]> code.delx.au - gnu-emacs/blob - lisp/nxml/xmltok.el
Merge from emacs--rel--22
[gnu-emacs] / lisp / nxml / xmltok.el
1 ;;; xmltok.el --- XML tokenization
2
3 ;; Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc.
4
5 ;; Author: James Clark
6 ;; Keywords: XML
7
8 ;; This file is part of GNU Emacs.
9
10 ;; GNU Emacs is free software: you can redistribute it and/or modify
11 ;; it under the terms of the GNU General Public License as published by
12 ;; the Free Software Foundation, either version 3 of the License, or
13 ;; (at your option) any later version.
14
15 ;; GNU Emacs is distributed in the hope that it will be useful,
16 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
17 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 ;; GNU General Public License for more details.
19
20 ;; You should have received a copy of the GNU General Public License
21 ;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
22
23 ;;; Commentary:
24
25 ;; This implements an XML 1.0 parser. It also implements the XML
26 ;; Namespaces Recommendation. It is designed to be conforming, but it
27 ;; works a bit differently from a normal XML parser. An XML document
28 ;; consists of the prolog and an instance. The prolog is parsed as a
29 ;; single unit using `xmltok-forward-prolog'. The instance is
30 ;; considered as a sequence of tokens, where a token is something like
31 ;; a start-tag, a comment, a chunk of data or a CDATA section. The
32 ;; tokenization of the instance is stateless: the tokenization of one
33 ;; part of the instance does not depend on tokenization of the
34 ;; preceding part of the instance. This allows the instance to be
35 ;; parsed incrementally. The main entry point is `xmltok-forward':
36 ;; this can be called at any point in the instance provided it is
37 ;; between tokens. The other entry point is `xmltok-forward-special'
38 ;; which skips over tokens other comments, processing instructions or
39 ;; CDATA sections (i.e. the constructs in an instance that can contain
40 ;; less than signs that don't start a token).
41 ;;
42 ;; This is a non-validating XML 1.0 processor. It does not resolve
43 ;; parameter entities (including the external DTD subset) and it does
44 ;; not resolve external general entities.
45 ;;
46 ;; It is non-conformant by design in the following respects.
47 ;;
48 ;; 1. It expects the client to detect aspects of well-formedness that
49 ;; are not internal to a single token, specifically checking that
50 ;; end-tags match start-tags and that the instance contains exactly
51 ;; one element.
52 ;;
53 ;; 2. It expects the client to detect duplicate attributes. Detection
54 ;; of duplicate attributes after expansion of namespace prefixes
55 ;; requires the namespace processing state. Detection of duplicate
56 ;; attributes before expansion of namespace prefixes does not, but is
57 ;; redundant given that the client will do detection of duplicate
58 ;; attributes after expansion of namespace prefixes.
59 ;;
60 ;; 3. It allows the client to recover from well-formedness errors.
61 ;; This is essential for use in applications where the document is
62 ;; being parsed during the editing process.
63 ;;
64 ;; 4. It does not support documents that do not conform to the lexical
65 ;; requirements of the XML Namespaces Recommendation (e.g. a document
66 ;; with a colon in an entity name).
67 ;;
68 ;; There are also a number of things that have not yet been
69 ;; implemented that make it non-conformant.
70 ;;
71 ;; 1. It does not implement default attributes. ATTLIST declarations
72 ;; are parsed, but no checking is done on the content of attribute
73 ;; value literals specifying default attribute values, and default
74 ;; attribute values are not reported to the client.
75 ;;
76 ;; 2. It does not implement internal entities containing elements. If
77 ;; an internal entity is referenced and parsing its replacement text
78 ;; yields one or more tags, then it will skip the reference and
79 ;; report this to the client.
80 ;;
81 ;; 3. It does not check the syntax of public identifiers in the DTD.
82 ;;
83 ;; 4. It allows some non-ASCII characters in certain situations where
84 ;; it should not. For example, it only enforces XML 1.0's
85 ;; restrictions on name characters strictly for ASCII characters. The
86 ;; problem here is XML's character model is based squarely on Unicode,
87 ;; whereas Emacs's is not (as of version 21). It is not clear what
88 ;; the right thing to do is.
89
90 ;;; Code:
91
92 (defvar xmltok-type nil)
93 (defvar xmltok-start nil)
94 (defvar xmltok-name-colon nil)
95 (defvar xmltok-name-end nil)
96 (defvar xmltok-replacement nil
97 "String containing replacement for a character or entity reference.")
98
99 (defvar xmltok-attributes nil
100 "List containing attributes of last scanned element.
101 Each member of the list is a vector representing an attribute, which
102 can be accessed using the functions `xmltok-attribute-name-start',
103 `xmltok-attribute-name-colon', `xmltok-attribute-name-end',
104 `xmltok-attribute-value-start', `xmltok-attribute-value-end',
105 `xmltok-attribute-raw-normalized-value', `xmltok-attribute-refs'.")
106
107 (defvar xmltok-namespace-attributes nil
108 "List containing namespace declarations of last scanned element.
109 List has same format as `xmltok-attributes'.")
110
111 (defvar xmltok-dtd nil
112 "Information about the DTD used by `xmltok-forward'.
113 `xmltok-forward-prolog' sets this up.
114
115 It consists of an alist of general entity names vs definitions. The
116 first member of the alist is t if references to entities not in the
117 alist are well-formed \(e.g. because there's an external subset that
118 wasn't parsed).
119
120 Each general entity name is a string. The definition is either nil,
121 a symbol, a string, a cons cell. If the definition is nil, then it
122 means that it's an internal entity but the result of parsing it is
123 unknown. If it is a symbol, then the symbol is either `unparsed',
124 meaning the entity is an unparsed entity, `external', meaning the
125 entity is or references an external entity, `element', meaning the
126 entity includes one or more elements, or `not-well-formed', meaning
127 the replacement text is not well-formed. If the definition is a
128 string, then the replacement text of the entity is that string; this
129 happens only during the parsing of the prolog. If the definition is
130 a cons cell \(ER . AR), then ER specifies the string that results
131 from referencing the entity in element content and AR is either nil,
132 meaning the replacement text included a <, or a string which is the
133 normalized attribute value.")
134
135 (defvar xmltok-dependent-regions nil
136 "List of descriptors of regions that a parsed token depends on.
137
138 A token depends on a region if the region occurs after the token and a
139 change in the region may require the token to be reparsed. This only
140 happens with markup that is not well-formed. For example, if a <?
141 occurs without a matching ?>, then the <? is returned as a
142 not-well-formed token. However, this token is dependent on region
143 from the end of the token to the end of the buffer: if this ever
144 contains ?> then the buffer must be reparsed from the <?.
145
146 A region descriptor is a list (FUN START END ARG ...), where FUN is a
147 function to be called when the region changes, START and END are
148 integers giving the start and end of the region, and ARG... are
149 additional arguments to be passed to FUN. FUN will be called with 5
150 arguments followed by the additional arguments if any: the position of
151 the start of the changed area in the region, the position of the end
152 of the changed area in the region, the length of the changed area
153 before the change, the position of the start of the region, the
154 position of the end of the region. FUN must return non-nil if the
155 region needs reparsing. FUN will be called in a `save-excursion'
156 with match-data saved.
157
158 `xmltok-forward', `xmltok-forward-special' and `xmltok-forward-prolog'
159 may add entries to the beginning of this list, but will not clear it.
160 `xmltok-forward' and `xmltok-forward-special' will only add entries
161 when returning tokens of type not-well-formed.")
162
163 (defvar xmltok-errors nil
164 "List of errors detected by `xmltok-forward' and `xmltok-forward-prolog'.
165 When `xmltok-forward' and `xmltok-forward-prolog' detect a
166 well-formedness error, they will add an entry to the beginning of this
167 list. Each entry is a vector [MESSAGE START END], where MESSAGE is a
168 string giving the error message and START and END are integers
169 indicating the position of the error.")
170
171 (defmacro xmltok-save (&rest body)
172 `(let (xmltok-type
173 xmltok-start
174 xmltok-name-colon
175 xmltok-name-end
176 xmltok-replacement
177 xmltok-attributes
178 xmltok-namespace-attributes
179 xmltok-dependent-regions
180 xmltok-errors)
181 ,@body))
182
183 (put 'xmltok-save 'lisp-indent-function 0)
184 (def-edebug-spec xmltok-save t)
185
186 (defsubst xmltok-attribute-name-start (att)
187 (aref att 0))
188
189 (defsubst xmltok-attribute-name-colon (att)
190 (aref att 1))
191
192 (defsubst xmltok-attribute-name-end (att)
193 (aref att 2))
194
195 (defsubst xmltok-attribute-value-start (att)
196 (aref att 3))
197
198 (defsubst xmltok-attribute-value-end (att)
199 (aref att 4))
200
201 (defsubst xmltok-attribute-raw-normalized-value (att)
202 "Return an object representing the normalized value of ATT.
203 This can be t indicating that the normalized value is the same as
204 the buffer substring from the start to the end of the value, or nil
205 indicating that the value is not well-formed or a string."
206 (aref att 5))
207
208 (defsubst xmltok-attribute-refs (att)
209 "Return a list of the entity and character references in ATT.
210 Each member is a vector [TYPE START END] where TYPE is either char-ref
211 or entity-ref and START and END are integers giving the start and end of
212 the reference. Nested entity references are not included in the list."
213 (aref att 6))
214
215 (defun xmltok-attribute-prefix (att)
216 (let ((colon (xmltok-attribute-name-colon att)))
217 (and colon
218 (buffer-substring-no-properties (xmltok-attribute-name-start att)
219 colon))))
220
221 (defun xmltok-attribute-local-name (att)
222 (let ((colon (xmltok-attribute-name-colon att)))
223 (buffer-substring-no-properties (if colon
224 (1+ colon)
225 (xmltok-attribute-name-start att))
226 (xmltok-attribute-name-end att))))
227
228 (defun xmltok-attribute-value (att)
229 (let ((rnv (xmltok-attribute-raw-normalized-value att)))
230 (and rnv
231 (if (stringp rnv)
232 rnv
233 (buffer-substring-no-properties (xmltok-attribute-value-start att)
234 (xmltok-attribute-value-end att))))))
235
236 (defun xmltok-start-tag-prefix ()
237 (and xmltok-name-colon
238 (buffer-substring-no-properties (1+ xmltok-start)
239 xmltok-name-colon)))
240
241 (defun xmltok-start-tag-local-name ()
242 (buffer-substring-no-properties (1+ (or xmltok-name-colon
243 xmltok-start))
244 xmltok-name-end))
245
246 (defun xmltok-end-tag-prefix ()
247 (and xmltok-name-colon
248 (buffer-substring-no-properties (+ 2 xmltok-start)
249 xmltok-name-colon)))
250
251 (defun xmltok-end-tag-local-name ()
252 (buffer-substring-no-properties (if xmltok-name-colon
253 (1+ xmltok-name-colon)
254 (+ 2 xmltok-start))
255 xmltok-name-end))
256
257 (defun xmltok-start-tag-qname ()
258 (buffer-substring-no-properties (+ xmltok-start 1) xmltok-name-end))
259
260 (defun xmltok-end-tag-qname ()
261 (buffer-substring-no-properties (+ xmltok-start 2) xmltok-name-end))
262
263 (defsubst xmltok-make-attribute (name-begin
264 name-colon
265 name-end
266 &optional
267 value-begin
268 value-end
269 raw-normalized-value)
270 "Make an attribute.
271 RAW-NORMALIZED-VALUE is nil if the value is not well-formed,
272 t if the normalized value is the string between VALUE-BEGIN
273 and VALUE-END, otherwise a STRING giving the value."
274 (vector name-begin
275 name-colon
276 name-end
277 value-begin
278 value-end
279 raw-normalized-value
280 nil))
281
282 (defsubst xmltok-error-message (err)
283 (aref err 0))
284
285 (defsubst xmltok-error-start (err)
286 (aref err 1))
287
288 (defsubst xmltok-error-end (err)
289 (aref err 2))
290
291 (defsubst xmltok-make-error (message start end)
292 (vector message start end))
293
294 (defun xmltok-add-error (message &optional start end)
295 (setq xmltok-errors
296 (cons (xmltok-make-error message
297 (or start xmltok-start)
298 (or end (point)))
299 xmltok-errors)))
300
301 (defun xmltok-add-dependent (fun &optional start end &rest args)
302 (setq xmltok-dependent-regions
303 (cons (cons fun
304 (cons (or start xmltok-start)
305 (cons (or end (point-max))
306 args)))
307 xmltok-dependent-regions)))
308
309 (defun xmltok-forward ()
310 (setq xmltok-start (point))
311 (let* ((case-fold-search nil)
312 (space-count (skip-chars-forward " \t\r\n"))
313 (ch (char-after)))
314 (cond ((eq ch ?\<)
315 (cond ((> space-count 0)
316 (setq xmltok-type 'space))
317 (t
318 (goto-char (1+ (point)))
319 (xmltok-scan-after-lt))))
320 ((eq ch ?\&)
321 (cond ((> space-count 0)
322 (setq xmltok-type 'space))
323 (t
324 (goto-char (1+ (point)))
325 (xmltok-scan-after-amp
326 (lambda (start end)
327 (xmltok-handle-entity start end))))))
328 ((re-search-forward "[<&]\\|\\(]]>\\)" nil t)
329 (cond ((not (match-beginning 1))
330 (goto-char (match-beginning 0))
331 ;; must have got a non-space char
332 (setq xmltok-type 'data))
333 ((= (match-beginning 1) xmltok-start)
334 (xmltok-add-error "Found `]]>' not closing a CDATA section")
335 (setq xmltok-type 'not-well-formed))
336 (t
337 (goto-char (match-beginning 0))
338 (setq xmltok-type
339 (if (= (point) (+ xmltok-start space-count))
340 'space
341 'data)))))
342 ((eq ch nil)
343 (setq xmltok-type
344 (if (> space-count 0)
345 'space
346 nil)))
347 (t
348 (goto-char (point-max))
349 (setq xmltok-type 'data)))))
350
351 (defun xmltok-forward-special (bound)
352 "Scan forward past the first special token starting at or after point.
353 Return nil if there is no special token that starts before BOUND.
354 CDATA sections, processing instructions and comments (and indeed
355 anything starting with < following by ? or !) count as special.
356 Return the type of the token."
357 (when (re-search-forward "<[?!]" (1+ bound) t)
358 (setq xmltok-start (match-beginning 0))
359 (goto-char (1+ xmltok-start))
360 (let ((case-fold-search nil))
361 (xmltok-scan-after-lt))))
362
363 (eval-when-compile
364
365 ;; A symbolic regexp is represented by a list whose CAR is the string
366 ;; containing the regexp and whose cdr is a list of symbolic names
367 ;; for the groups in the string.
368
369 ;; Construct a symbolic regexp from a regexp.
370 (defun xmltok-r (str)
371 (cons str nil))
372
373 ;; Concatenate zero of more regexps and symbolic regexps.
374 (defun xmltok+ (&rest args)
375 (let (strs names)
376 (while args
377 (let ((arg (car args)))
378 (if (stringp arg)
379 (setq strs (cons arg strs))
380 (setq strs (cons (car arg) strs))
381 (setq names (cons (cdr arg) names)))
382 (setq args (cdr args))))
383 (cons (apply 'concat (nreverse strs))
384 (apply 'append (nreverse names))))))
385
386 (eval-when-compile
387 ;; Make a symbolic group named NAME from the regexp R.
388 ;; R may be a symbolic regexp or an ordinary regexp.
389 (defmacro xmltok-g (name &rest r)
390 (let ((sym (make-symbol "r")))
391 `(let ((,sym (xmltok+ ,@r)))
392 (if (stringp ,sym)
393 (cons (concat "\\(" ,sym "\\)") (cons ',name nil))
394 (cons (concat "\\(" (car ,sym) "\\)") (cons ',name (cdr ,sym)))))))
395
396 (defun xmltok-p (&rest r) (xmltok+ "\\(?:"
397 (apply 'xmltok+ r)
398 "\\)"))
399
400 ;; Get the group index of ELEM in a LIST of symbols.
401 (defun xmltok-get-index (elem list)
402 (or elem
403 (error "Missing group name"))
404 (let ((found nil)
405 (i 1))
406 (while list
407 (cond ((eq elem (car list))
408 (setq found i)
409 (setq list nil))
410 (t
411 (setq i (1+ i))
412 (setq list (cdr list)))))
413 (or found
414 (error "Bad group name %s" elem))))
415
416 ;; Define a macro SYM using a symbolic regexp R.
417 ;; SYM can be called in three ways:
418 ;; (SYM regexp)
419 ;; expands to the regexp in R
420 ;; (SYM start G)
421 ;; expands to
422 ;; (match-beginning N)
423 ;; where N is the group index of G in R.
424 ;; (SYM end G)
425 ;; expands to
426 ;; (match-end N)
427 ;; where N is the group index of G in R.
428 (defmacro xmltok-defregexp (sym r)
429 `(defalias ',sym
430 (let ((r ,r))
431 `(macro lambda (action &optional group-name)
432 (cond ((eq action 'regexp)
433 ,(car r))
434 ((or (eq action 'start) (eq action 'beginning))
435 (list 'match-beginning (xmltok-get-index group-name
436 ',(cdr r))))
437 ((eq action 'end)
438 (list 'match-end (xmltok-get-index group-name
439 ',(cdr r))))
440 ((eq action 'string)
441 (list 'match-string
442 (xmltok-get-index group-name ',(cdr r))))
443 ((eq action 'string-no-properties)
444 (list 'match-string-no-properties
445 (xmltok-get-index group-name ',(cdr r))))
446 (t (error "Invalid action: %s" action))))))))
447
448
449 (eval-when-compile
450 (let* ((or "\\|")
451 (open "\\(?:")
452 (gopen "\\(")
453 (close "\\)")
454 (name-start-char "[_[:alpha:]]")
455 (name-continue-not-start-char "[-.[:digit:]]")
456 (name-continue-char "[-._[:alnum:]]")
457 (* "*")
458 (+ "+")
459 (opt "?")
460 (question "\\?")
461 (s "[ \r\t\n]")
462 (s+ (concat s +))
463 (s* (concat s *))
464 (ncname (concat name-start-char name-continue-char *))
465 (entity-ref
466 (xmltok+ (xmltok-g entity-name ncname)
467 (xmltok-g entity-ref-close ";") opt))
468 (decimal-ref
469 (xmltok+ (xmltok-g decimal "[0-9]" +)
470 (xmltok-g decimal-ref-close ";") opt))
471 (hex-ref
472 (xmltok+ "x" open
473 (xmltok-g hex "[0-9a-fA-F]" +)
474 (xmltok-g hex-ref-close ";") opt
475 close opt))
476 (char-ref
477 (xmltok+ (xmltok-g number-sign "#")
478 open decimal-ref or hex-ref close opt))
479 (start-tag-close
480 (xmltok+ open (xmltok-g start-tag-close s* ">")
481 or open (xmltok-g empty-tag-slash s* "/")
482 (xmltok-g empty-tag-close ">") opt close
483 or (xmltok-g start-tag-s s+)
484 close))
485 (start-tag
486 (xmltok+ (xmltok-g start-tag-name
487 ncname (xmltok-g start-tag-colon ":" ncname) opt)
488 start-tag-close opt))
489 (end-tag
490 (xmltok+ (xmltok-g end-tag-slash "/")
491 open (xmltok-g end-tag-name
492 ncname
493 (xmltok-g end-tag-colon ":" ncname) opt)
494 (xmltok-g end-tag-close s* ">") opt
495 close opt))
496 (comment
497 (xmltok+ (xmltok-g markup-declaration "!")
498 (xmltok-g comment-first-dash "-"
499 (xmltok-g comment-open "-") opt) opt))
500 (cdata-section
501 (xmltok+ "!"
502 (xmltok-g marked-section-open "\\[")
503 open "C"
504 open "D"
505 open "A"
506 open "T"
507 open "A"
508 (xmltok-g cdata-section-open "\\[" ) opt
509 close opt ; A
510 close opt ; T
511 close opt ; A
512 close opt ; D
513 close opt)) ; C
514 (processing-instruction
515 (xmltok-g processing-instruction-question question)))
516
517 (xmltok-defregexp xmltok-ncname (xmltok+ open ncname close))
518
519 (xmltok-defregexp xmltok-after-amp
520 (xmltok+ entity-ref or char-ref))
521 (xmltok-defregexp xmltok-after-lt
522 (xmltok+ start-tag
523 or end-tag
524 ;; cdata-section must come before comment
525 ;; because we treat <! as a comment
526 ;; and Emacs doesn't do fully greedy matching
527 ;; by default
528 or cdata-section
529 or comment
530 or processing-instruction))
531 (xmltok-defregexp
532 xmltok-attribute
533 (let* ((lit1
534 (xmltok+ "'"
535 "[^<'&\r\n\t]*"
536 (xmltok-g complex1 "[&\r\n\t][^<']*") opt
537 "'"))
538 (lit2 (cons (replace-regexp-in-string "'" "\"" (car lit1))
539 '(complex2)))
540 (literal (xmltok-g literal lit1 or lit2))
541 (name (xmltok+ open (xmltok-g xmlns "xmlns") or ncname close
542 (xmltok-g colon ":" ncname) opt)))
543 (xmltok+ (xmltok-g name name)
544 s* "="
545 ;; If the literal isn't followed by what it should be,
546 ;; then the closing delimiter is probably really the
547 ;; opening delimiter of another literal, so don't
548 ;; absorb the literal in this case.
549 open s* literal start-tag-close close opt)))
550 (xmltok-defregexp
551 xmltok-xml-declaration
552 (let* ((literal-content "[-._:a-zA-Z0-9]+")
553 (literal
554 (concat open "\"" literal-content "\""
555 or "'" literal-content "'" close))
556 (version-att
557 (xmltok+ open
558 s+ (xmltok-g version-name "version")
559 s* "="
560 s* (xmltok-g version-value literal)
561 close opt))
562 (encoding-att
563 (xmltok+ open
564 s+ (xmltok-g encoding-name "encoding")
565 s* "="
566 s* (xmltok-g encoding-value literal)
567 close opt))
568 (yes-no
569 (concat open "yes" or "no" close))
570 (standalone-att
571 (xmltok+ open
572 s+ (xmltok-g standalone-name "standalone")
573 s* "="
574 s* (xmltok-g standalone-value
575 "\"" yes-no "\"" or "'" yes-no "'")
576 close opt)))
577 (xmltok+ "<" question "xml"
578 version-att
579 encoding-att
580 standalone-att
581 s* question ">")))
582 (xmltok-defregexp
583 xmltok-prolog
584 (let* ((single-char (xmltok-g single-char "[[|,(\"'>]"))
585 (internal-subset-close (xmltok-g internal-subset-close
586 "][ \t\r\n]*>"))
587 (starts-with-close-paren
588 (xmltok-g close-paren
589 ")"
590 (xmltok-p
591 (xmltok-g close-paren-occur "[+?]")
592 or
593 (xmltok-g close-paren-star "\\*"))
594 opt))
595 (starts-with-percent
596 (xmltok-g percent
597 "%" (xmltok-g param-entity-ref
598 ncname
599 (xmltok-g param-entity-ref-close
600 ";") opt) opt))
601 (starts-with-nmtoken-not-name
602 (xmltok-g nmtoken
603 (xmltok-p name-continue-not-start-char or ":")
604 (xmltok-p name-continue-char or ":") *))
605 (nmtoken-after-colon
606 (xmltok+
607 (xmltok-p name-continue-not-start-char or ":")
608 (xmltok-p name-continue-char or ":") *
609 or
610 name-start-char
611 name-continue-char *
612 ":"
613 (xmltok-p name-continue-char or ":") *))
614 (after-ncname
615 (xmltok+ (xmltok-g ncname-nmtoken
616 ":" (xmltok-p nmtoken-after-colon))
617 or (xmltok-p (xmltok-g colon ":" ncname)
618 (xmltok-g colon-name-occur "[?+*]") opt)
619 or (xmltok-g ncname-occur "[?+*]")
620 or (xmltok-g ncname-colon ":")))
621 (starts-with-name
622 (xmltok-g name ncname (xmltok-p after-ncname) opt))
623 (starts-with-hash
624 (xmltok-g pound
625 "#" (xmltok-g hash-name ncname)))
626 (markup-declaration
627 (xmltok-g markup-declaration
628 "!" (xmltok-p (xmltok-g comment-first-dash "-"
629 (xmltok-g comment-open "-") opt)
630 or (xmltok-g named-markup-declaration
631 ncname)) opt))
632 (after-lt
633 (xmltok+ markup-declaration
634 or (xmltok-g processing-instruction-question
635 question)
636 or (xmltok-g instance-start
637 ncname)))
638 (starts-with-lt (xmltok-g less-than "<" (xmltok-p after-lt) opt)))
639 (xmltok+ starts-with-lt
640 or single-char
641 or starts-with-close-paren
642 or starts-with-percent
643 or starts-with-name
644 or starts-with-nmtoken-not-name
645 or starts-with-hash
646 or internal-subset-close)))))
647
648 (defconst xmltok-ncname-regexp (xmltok-ncname regexp))
649
650 (defun xmltok-scan-after-lt ()
651 (cond ((not (looking-at (xmltok-after-lt regexp)))
652 (xmltok-add-error "`<' that is not markup must be entered as `&lt;'")
653 (setq xmltok-type 'not-well-formed))
654 (t
655 (goto-char (match-end 0))
656 (cond ((xmltok-after-lt start start-tag-close)
657 (setq xmltok-name-end
658 (xmltok-after-lt end start-tag-name))
659 (setq xmltok-name-colon
660 (xmltok-after-lt start start-tag-colon))
661 (setq xmltok-attributes nil)
662 (setq xmltok-namespace-attributes nil)
663 (setq xmltok-type 'start-tag))
664 ((xmltok-after-lt start end-tag-close)
665 (setq xmltok-name-end
666 (xmltok-after-lt end end-tag-name))
667 (setq xmltok-name-colon
668 (xmltok-after-lt start end-tag-colon))
669 (setq xmltok-type 'end-tag))
670 ((xmltok-after-lt start start-tag-s)
671 (setq xmltok-name-end
672 (xmltok-after-lt end start-tag-name))
673 (setq xmltok-name-colon
674 (xmltok-after-lt start start-tag-colon))
675 (setq xmltok-namespace-attributes nil)
676 (setq xmltok-attributes nil)
677 (xmltok-scan-attributes)
678 xmltok-type)
679 ((xmltok-after-lt start empty-tag-close)
680 (setq xmltok-name-end
681 (xmltok-after-lt end start-tag-name))
682 (setq xmltok-name-colon
683 (xmltok-after-lt start start-tag-colon))
684 (setq xmltok-attributes nil)
685 (setq xmltok-namespace-attributes nil)
686 (setq xmltok-type 'empty-element))
687 ((xmltok-after-lt start cdata-section-open)
688 (setq xmltok-type
689 (if (search-forward "]]>" nil t)
690 'cdata-section
691 (xmltok-add-error "No closing ]]>")
692 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
693 nil
694 nil
695 "]]>")
696 'not-well-formed)))
697 ((xmltok-after-lt start processing-instruction-question)
698 (xmltok-scan-after-processing-instruction-open))
699 ((xmltok-after-lt start comment-open)
700 (xmltok-scan-after-comment-open))
701 ((xmltok-after-lt start empty-tag-slash)
702 (setq xmltok-name-end
703 (xmltok-after-lt end start-tag-name))
704 (setq xmltok-name-colon
705 (xmltok-after-lt start start-tag-colon))
706 (setq xmltok-attributes nil)
707 (setq xmltok-namespace-attributes nil)
708 (xmltok-add-error "Expected `/>'" (1- (point)))
709 (setq xmltok-type 'partial-empty-element))
710 ((xmltok-after-lt start start-tag-name)
711 (xmltok-add-error "Missing `>'"
712 nil
713 (1+ xmltok-start))
714 (setq xmltok-name-end
715 (xmltok-after-lt end start-tag-name))
716 (setq xmltok-name-colon
717 (xmltok-after-lt start start-tag-colon))
718 (setq xmltok-namespace-attributes nil)
719 (setq xmltok-attributes nil)
720 (setq xmltok-type 'partial-start-tag))
721 ((xmltok-after-lt start end-tag-name)
722 (setq xmltok-name-end (xmltok-after-lt end end-tag-name))
723 (setq xmltok-name-colon
724 (xmltok-after-lt start end-tag-colon))
725 (cond ((and (not xmltok-name-colon)
726 (eq (char-after) ?:))
727 (goto-char (1+ (point)))
728 (xmltok-add-error "Expected name following `:'"
729 (1- (point))))
730 (t
731 (xmltok-add-error "Missing `>'"
732 nil
733 (1+ xmltok-start))))
734 (setq xmltok-type 'partial-end-tag))
735 ((xmltok-after-lt start end-tag-slash)
736 (xmltok-add-error "Expected name following `</'")
737 (setq xmltok-name-end nil)
738 (setq xmltok-name-colon nil)
739 (setq xmltok-type 'partial-end-tag))
740 ((xmltok-after-lt start marked-section-open)
741 (xmltok-add-error "Expected `CDATA[' after `<!['"
742 xmltok-start
743 (+ 3 xmltok-start))
744 (setq xmltok-type 'not-well-formed))
745 ((xmltok-after-lt start comment-first-dash)
746 (xmltok-add-error "Expected `-' after `<!-'"
747 xmltok-start
748 (+ 3 xmltok-start))
749 (setq xmltok-type 'not-well-formed))
750 ((xmltok-after-lt start markup-declaration)
751 (xmltok-add-error "Expected `[CDATA[' or `--' after `<!'"
752 xmltok-start
753 (+ 2 xmltok-start))
754 (setq xmltok-type 'not-well-formed))
755 (t
756 (xmltok-add-error "Not well-formed")
757 (setq xmltok-type 'not-well-formed))))))
758
759 ;; XXX This should be unified with
760 ;; xmltok-scan-prolog-after-processing-instruction-open
761 ;; XXX maybe should include rest of line (up to any <,>) in unclosed PI
762 (defun xmltok-scan-after-processing-instruction-open ()
763 (cond ((not (search-forward "?>" nil t))
764 (xmltok-add-error "No closing ?>"
765 xmltok-start
766 (+ xmltok-start 2))
767 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
768 nil
769 nil
770 "?>")
771 (setq xmltok-type 'not-well-formed))
772 (t
773 (cond ((not (save-excursion
774 (goto-char (+ 2 xmltok-start))
775 (and (looking-at (xmltok-ncname regexp))
776 (setq xmltok-name-end (match-end 0)))))
777 (setq xmltok-name-end (+ xmltok-start 2))
778 (xmltok-add-error "<? not followed by name"
779 (+ xmltok-start 2)
780 (+ xmltok-start 3)))
781 ((not (or (memq (char-after xmltok-name-end)
782 '(?\n ?\t ?\r ? ))
783 (= xmltok-name-end (- (point) 2))))
784 (xmltok-add-error "Target not followed by whitespace"
785 xmltok-name-end
786 (1+ xmltok-name-end)))
787 ((and (= xmltok-name-end (+ xmltok-start 5))
788 (save-excursion
789 (goto-char (+ xmltok-start 2))
790 (let ((case-fold-search t))
791 (looking-at "xml"))))
792 (xmltok-add-error "Processing instruction target is xml"
793 (+ xmltok-start 2)
794 (+ xmltok-start 5))))
795 (setq xmltok-type 'processing-instruction))))
796
797 (defun xmltok-scan-after-comment-open ()
798 (setq xmltok-type
799 (cond ((not (search-forward "--" nil t))
800 (xmltok-add-error "No closing -->")
801 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
802 nil
803 nil
804 ;; not --> because
805 ;; -- is not allowed
806 ;; in comments in XML
807 "--")
808 'not-well-formed)
809 ((eq (char-after) ?>)
810 (goto-char (1+ (point)))
811 'comment)
812 (t
813 (xmltok-add-dependent
814 'xmltok-semi-closed-reparse-p
815 nil
816 (point)
817 "--"
818 2)
819 ;; just include the <!-- in the token
820 (goto-char (+ xmltok-start 4))
821 ;; Need do this after the goto-char because
822 ;; marked error should just apply to <!--
823 (xmltok-add-error "First following `--' not followed by `>'")
824 'not-well-formed))))
825
826 (defun xmltok-scan-attributes ()
827 (let ((recovering nil)
828 (atts-needing-normalization nil))
829 (while (cond ((or (looking-at (xmltok-attribute regexp))
830 ;; use non-greedy group
831 (when (looking-at (concat "[^<>\n]+?"
832 (xmltok-attribute regexp)))
833 (unless recovering
834 (xmltok-add-error "Malformed attribute"
835 (point)
836 (save-excursion
837 (goto-char (xmltok-attribute start
838 name))
839 (skip-chars-backward "\r\n\t ")
840 (point))))
841 t))
842 (setq recovering nil)
843 (goto-char (match-end 0))
844 (let ((att (xmltok-add-attribute)))
845 (when att
846 (setq atts-needing-normalization
847 (cons att atts-needing-normalization))))
848 (cond ((xmltok-attribute start start-tag-s) t)
849 ((xmltok-attribute start start-tag-close)
850 (setq xmltok-type 'start-tag)
851 nil)
852 ((xmltok-attribute start empty-tag-close)
853 (setq xmltok-type 'empty-element)
854 nil)
855 ((xmltok-attribute start empty-tag-slash)
856 (setq xmltok-type 'partial-empty-element)
857 (xmltok-add-error "Expected `/>'"
858 (1- (point)))
859 nil)
860 ((looking-at "[ \t\r\n]*[\"']")
861 (goto-char (match-end 0))
862 (xmltok-add-error "Missing closing delimiter"
863 (1- (point)))
864 (setq recovering t)
865 t)
866 ((looking-at "[ \t]*\\([^ \t\r\n\"'=<>/]+\\)[ \t\r\n/>]")
867 (goto-char (match-end 1))
868 (xmltok-add-error "Attribute value not quoted"
869 (match-beginning 1))
870 (setq recovering t)
871 t)
872 (t
873 (xmltok-add-error "Missing attribute value"
874 (1- (point)))
875 (setq recovering t)
876 t)))
877 ((looking-at "[^<>\n]*/>")
878 (let ((start (point)))
879 (goto-char (match-end 0))
880 (unless recovering
881 (xmltok-add-error "Malformed empty-element"
882 start
883 (- (point) 2))))
884 (setq xmltok-type 'empty-element)
885 nil)
886 ((looking-at "[^<>\n]*>")
887 (let ((start (point)))
888 (goto-char (match-end 0))
889 (unless recovering
890 (xmltok-add-error "Malformed start-tag"
891 start
892 (1- (point)))))
893 (setq xmltok-type 'start-tag)
894 nil)
895 (t
896 (when recovering
897 (skip-chars-forward "^<>\n"))
898 (xmltok-add-error "Missing `>'"
899 xmltok-start
900 (1+ xmltok-start))
901 (setq xmltok-type 'partial-start-tag)
902 nil)))
903 (while atts-needing-normalization
904 (xmltok-normalize-attribute (car atts-needing-normalization))
905 (setq atts-needing-normalization (cdr atts-needing-normalization))))
906 (setq xmltok-attributes
907 (nreverse xmltok-attributes))
908 (setq xmltok-namespace-attributes
909 (nreverse xmltok-namespace-attributes)))
910
911 (defun xmltok-add-attribute ()
912 "Return the attribute if it needs normalizing, otherwise nil."
913 (let* ((needs-normalizing nil)
914 (att
915 (if (xmltok-attribute start literal)
916 (progn
917 (setq needs-normalizing
918 (or (xmltok-attribute start complex1)
919 (xmltok-attribute start complex2)))
920 (xmltok-make-attribute (xmltok-attribute start name)
921 (xmltok-attribute start colon)
922 (xmltok-attribute end name)
923 (1+ (xmltok-attribute start literal))
924 (1- (xmltok-attribute end literal))
925 (not needs-normalizing)))
926 (xmltok-make-attribute (xmltok-attribute start name)
927 (xmltok-attribute start colon)
928 (xmltok-attribute end name)))))
929 (if (xmltok-attribute start xmlns)
930 (setq xmltok-namespace-attributes
931 (cons att xmltok-namespace-attributes))
932 (setq xmltok-attributes
933 (cons att xmltok-attributes)))
934 (and needs-normalizing
935 att)))
936
937 (defun xmltok-normalize-attribute (att)
938 (let ((end (xmltok-attribute-value-end att))
939 (well-formed t)
940 (value-parts nil)
941 (refs nil))
942 (save-excursion
943 (goto-char (xmltok-attribute-value-start att))
944 (while (progn
945 (let ((n (skip-chars-forward "^\r\t\n&" end)))
946 (when (> n 0)
947 (setq value-parts
948 (cons (buffer-substring-no-properties (- (point) n)
949 (point))
950 value-parts))))
951 (when (< (point) end)
952 (goto-char (1+ (point)))
953 (cond ((eq (char-before) ?\&)
954 (let ((xmltok-start (1- (point)))
955 xmltok-type xmltok-replacement)
956 (xmltok-scan-after-amp
957 (lambda (start end)
958 (xmltok-handle-entity start end t)))
959 (cond ((or (eq xmltok-type 'char-ref)
960 (eq xmltok-type 'entity-ref))
961 (setq refs
962 (cons (vector xmltok-type
963 xmltok-start
964 (point))
965 refs))
966 (if xmltok-replacement
967 (setq value-parts
968 (cons xmltok-replacement
969 value-parts))
970 (setq well-formed nil)))
971 (t (setq well-formed nil)))))
972 (t (setq value-parts
973 (cons " " value-parts)))))
974 (< (point) end))))
975 (when well-formed
976 (aset att 5 (apply 'concat (nreverse value-parts))))
977 (aset att 6 (nreverse refs))))
978
979 (defun xmltok-scan-after-amp (entity-handler)
980 (cond ((not (looking-at (xmltok-after-amp regexp)))
981 (xmltok-add-error "`&' that is not markup must be entered as `&amp;'")
982 (setq xmltok-type 'not-well-formed))
983 (t
984 (goto-char (match-end 0))
985 (cond ((xmltok-after-amp start entity-ref-close)
986 (funcall entity-handler
987 (xmltok-after-amp start entity-name)
988 (xmltok-after-amp end entity-name))
989 (setq xmltok-type 'entity-ref))
990 ((xmltok-after-amp start decimal-ref-close)
991 (xmltok-scan-char-ref (xmltok-after-amp start decimal)
992 (xmltok-after-amp end decimal)
993 10))
994 ((xmltok-after-amp start hex-ref-close)
995 (xmltok-scan-char-ref (xmltok-after-amp start hex)
996 (xmltok-after-amp end hex)
997 16))
998 ((xmltok-after-amp start number-sign)
999 (xmltok-add-error "Missing character number")
1000 (setq xmltok-type 'not-well-formed))
1001 (t
1002 (xmltok-add-error "Missing closing `;'")
1003 (setq xmltok-type 'not-well-formed))))))
1004
1005 (defconst xmltok-entity-error-messages
1006 '((unparsed . "Referenced entity is unparsed")
1007 (not-well-formed . "Referenced entity is not well-formed")
1008 (external nil . "Referenced entity is external")
1009 (element nil . "Referenced entity contains <")))
1010
1011 (defun xmltok-handle-entity (start end &optional attributep)
1012 (let* ((name (buffer-substring-no-properties start end))
1013 (name-def (assoc name xmltok-dtd))
1014 (def (cdr name-def)))
1015 (cond ((setq xmltok-replacement (and (consp def)
1016 (if attributep
1017 (cdr def)
1018 (car def)))))
1019 ((null name-def)
1020 (unless (eq (car xmltok-dtd) t)
1021 (xmltok-add-error "Referenced entity has not been defined"
1022 start
1023 end)))
1024 ((and attributep (consp def))
1025 (xmltok-add-error "Referenced entity contains <"
1026 start
1027 end))
1028 (t
1029 (let ((err (cdr (assq def xmltok-entity-error-messages))))
1030 (when (consp err)
1031 (setq err (if attributep (cdr err) (car err))))
1032 (when err
1033 (xmltok-add-error err start end)))))))
1034
1035 (defun xmltok-scan-char-ref (start end base)
1036 (setq xmltok-replacement
1037 (let ((n (string-to-number (buffer-substring-no-properties start end)
1038 base)))
1039 (cond ((and (integerp n) (xmltok-valid-char-p n))
1040 (setq n (xmltok-unicode-to-char n))
1041 (and n (string n)))
1042 (t
1043 (xmltok-add-error "Invalid character code" start end)
1044 nil))))
1045 (setq xmltok-type 'char-ref))
1046
1047 (defun xmltok-char-number (start end)
1048 (let* ((base (if (eq (char-after (+ start 2)) ?x)
1049 16
1050 10))
1051 (n (string-to-number
1052 (buffer-substring-no-properties (+ start (if (= base 16) 3 2))
1053 (1- end))
1054 base)))
1055 (and (integerp n)
1056 (xmltok-valid-char-p n)
1057 n)))
1058
1059 (defun xmltok-unclosed-reparse-p (change-start
1060 change-end
1061 pre-change-length
1062 start
1063 end
1064 delimiter)
1065 (let ((len-1 (1- (length delimiter))))
1066 (goto-char (max start (- change-start len-1)))
1067 (search-forward delimiter (min end (+ change-end len-1)) t)))
1068
1069 ;; Handles a <!-- with the next -- not followed by >
1070
1071 (defun xmltok-semi-closed-reparse-p (change-start
1072 change-end
1073 pre-change-length
1074 start
1075 end
1076 delimiter
1077 delimiter-length)
1078 (or (<= (- end delimiter-length) change-end)
1079 (xmltok-unclosed-reparse-p change-start
1080 change-end
1081 pre-change-length
1082 start
1083 end
1084 delimiter)))
1085
1086 (defun xmltok-valid-char-p (n)
1087 "Return non-nil if N is the Unicode code of a valid XML character."
1088 (cond ((< n #x20) (memq n '(#xA #xD #x9)))
1089 ((< n #xD800) t)
1090 ((< n #xE000) nil)
1091 ((< n #xFFFE) t)
1092 (t (and (> n #xFFFF)
1093 (< n #x110000)))))
1094
1095 (defun xmltok-unicode-to-char (n)
1096 "Return the character corresponding to Unicode scalar value N.
1097 Return nil if unsupported in Emacs."
1098 (decode-char 'ucs n))
1099
1100 ;;; Prolog parsing
1101
1102 (defvar xmltok-contains-doctype nil)
1103 (defvar xmltok-doctype-external-subset-flag nil)
1104 (defvar xmltok-internal-subset-start nil)
1105 (defvar xmltok-had-param-entity-ref nil)
1106 (defvar xmltok-prolog-regions nil)
1107 (defvar xmltok-standalone nil
1108 "Non-nil if there was an XML declaration specifying standalone=\"yes\".")
1109 (defvar xmltok-markup-declaration-doctype-flag nil)
1110
1111 (defconst xmltok-predefined-entity-alist
1112 '(("lt" "<" . "<")
1113 ("gt" ">" . ">")
1114 ("amp" "&" . "&")
1115 ("apos" "'" . "'")
1116 ("quot" "\"" . "\"")))
1117
1118 (defun xmltok-forward-prolog ()
1119 "Move forward to the end of the XML prolog.
1120
1121 Returns a list of vectors [TYPE START END] where TYPE is a symbol and
1122 START and END are integers giving the start and end of the region of
1123 that type. TYPE can be one of xml-declaration,
1124 xml-declaration-attribute-name, xml-declaration-attribute-value,
1125 comment, processing-instruction-left, processing-instruction-right,
1126 markup-declaration-open, markup-declaration-close,
1127 internal-subset-open, internal-subset-close, hash-name, keyword,
1128 literal, encoding-name.
1129 Adds to `xmltok-errors' and `xmltok-dependent-regions' as appropriate."
1130 (let ((case-fold-search nil)
1131 xmltok-start
1132 xmltok-type
1133 xmltok-prolog-regions
1134 xmltok-contains-doctype
1135 xmltok-internal-subset-start
1136 xmltok-had-param-entity-ref
1137 xmltok-standalone
1138 xmltok-doctype-external-subset-flag
1139 xmltok-markup-declaration-doctype-flag)
1140 (setq xmltok-dtd xmltok-predefined-entity-alist)
1141 (xmltok-scan-xml-declaration)
1142 (xmltok-next-prolog-token)
1143 (while (condition-case err
1144 (when (xmltok-parse-prolog-item)
1145 (xmltok-next-prolog-token))
1146 (xmltok-markup-declaration-parse-error
1147 (xmltok-skip-markup-declaration))))
1148 (when xmltok-internal-subset-start
1149 (xmltok-add-error "No closing ]"
1150 (1- xmltok-internal-subset-start)
1151 xmltok-internal-subset-start))
1152 (xmltok-parse-entities)
1153 ;; XXX prune dependent-regions for those entirely in prolog
1154 (nreverse xmltok-prolog-regions)))
1155
1156 (defconst xmltok-bad-xml-decl-regexp
1157 "[ \t\r\n]*<\\?xml\\(?:[ \t\r\n]\\|\\?>\\)")
1158
1159 ;;;###autoload
1160 (defun xmltok-get-declared-encoding-position (&optional limit)
1161 "Return the position of the encoding in the XML declaration at point.
1162 If there is a well-formed XML declaration starting at point and it
1163 contains an encoding declaration, then return (START . END)
1164 where START and END are the positions of the start and the end
1165 of the encoding name; if there is no encoding declaration return
1166 the position where and encoding declaration could be inserted.
1167 If there is XML that is not well-formed that looks like an XML
1168 declaration, return nil. Otherwise, return t.
1169 If LIMIT is non-nil, then do not consider characters beyond LIMIT."
1170 (cond ((let ((case-fold-search nil))
1171 (and (looking-at (xmltok-xml-declaration regexp))
1172 (or (not limit) (<= (match-end 0) limit))))
1173 (let ((end (xmltok-xml-declaration end encoding-value)))
1174 (if end
1175 (cons (1+ (xmltok-xml-declaration start encoding-value))
1176 (1- end))
1177 (or (xmltok-xml-declaration end version-value)
1178 (+ (point) 5)))))
1179 ((not (let ((case-fold-search t))
1180 (looking-at xmltok-bad-xml-decl-regexp))))))
1181
1182 (defun xmltok-scan-xml-declaration ()
1183 (when (looking-at (xmltok-xml-declaration regexp))
1184 (xmltok-add-prolog-region 'xml-declaration (point) (match-end 0))
1185 (goto-char (match-end 0))
1186 (when (xmltok-xml-declaration start version-name)
1187 (xmltok-add-prolog-region 'xml-declaration-attribute-name
1188 (xmltok-xml-declaration start version-name)
1189 (xmltok-xml-declaration end version-name))
1190 (let ((start (xmltok-xml-declaration start version-value))
1191 (end (xmltok-xml-declaration end version-value)))
1192 (xmltok-add-prolog-region 'xml-declaration-attribute-value
1193 start
1194 end)))
1195 ;; XXX need to check encoding name
1196 ;; Should start with letter, not contain colon
1197 (when (xmltok-xml-declaration start encoding-name)
1198 (xmltok-add-prolog-region 'xml-declaration-attribute-name
1199 (xmltok-xml-declaration start encoding-name)
1200 (xmltok-xml-declaration end encoding-name))
1201 (let ((start (xmltok-xml-declaration start encoding-value))
1202 (end (xmltok-xml-declaration end encoding-value)))
1203 (xmltok-add-prolog-region 'encoding-name
1204 (1+ start)
1205 (1- end))
1206 (xmltok-add-prolog-region 'xml-declaration-attribute-value
1207 start
1208 end)))
1209 (when (xmltok-xml-declaration start standalone-name)
1210 (xmltok-add-prolog-region 'xml-declaration-attribute-name
1211 (xmltok-xml-declaration start standalone-name)
1212 (xmltok-xml-declaration end standalone-name))
1213 (let ((start (xmltok-xml-declaration start standalone-value))
1214 (end (xmltok-xml-declaration end standalone-value)))
1215 (xmltok-add-prolog-region 'xml-declaration-attribute-value
1216 start
1217 end)
1218 (setq xmltok-standalone
1219 (string= (buffer-substring-no-properties (1+ start) (1- end))
1220 "yes"))))
1221 t))
1222
1223 (defconst xmltok-markup-declaration-alist
1224 '(("ELEMENT" . xmltok-parse-element-declaration)
1225 ("ATTLIST" . xmltok-parse-attlist-declaration)
1226 ("ENTITY" . xmltok-parse-entity-declaration)
1227 ("NOTATION" . xmltok-parse-notation-declaration)))
1228
1229 (defun xmltok-parse-prolog-item ()
1230 (cond ((eq xmltok-type 'comment)
1231 (xmltok-add-prolog-region 'comment
1232 xmltok-start
1233 (point))
1234 t)
1235 ((eq xmltok-type 'processing-instruction))
1236 ((eq xmltok-type 'named-markup-declaration)
1237 (setq xmltok-markup-declaration-doctype-flag nil)
1238 (xmltok-add-prolog-region 'markup-declaration-open
1239 xmltok-start
1240 (point))
1241 (let* ((name (buffer-substring-no-properties
1242 (+ xmltok-start 2)
1243 (point)))
1244 (fun (cdr (assoc name xmltok-markup-declaration-alist))))
1245 (cond (fun
1246 (unless xmltok-internal-subset-start
1247 (xmltok-add-error
1248 "Declaration allowed only in internal subset"))
1249 (funcall fun))
1250 ((string= name "DOCTYPE")
1251 (xmltok-parse-doctype))
1252 (t
1253 (xmltok-add-error "Unknown markup declaration"
1254 (+ xmltok-start 2))
1255 (xmltok-next-prolog-token)
1256 (xmltok-markup-declaration-parse-error))))
1257 t)
1258 ((or (eq xmltok-type 'end-prolog)
1259 (not xmltok-type))
1260 nil)
1261 ((eq xmltok-type 'internal-subset-close)
1262 (xmltok-add-prolog-region 'internal-subset-close
1263 xmltok-start
1264 (1+ xmltok-start))
1265 (xmltok-add-prolog-region 'markup-declaration-close
1266 (1- (point))
1267 (point))
1268 (if xmltok-internal-subset-start
1269 (setq xmltok-internal-subset-start nil)
1270 (xmltok-add-error "]> outside internal subset"))
1271 t)
1272 ((eq xmltok-type 'param-entity-ref)
1273 (if xmltok-internal-subset-start
1274 (setq xmltok-had-param-entity-ref t)
1275 (xmltok-add-error "Parameter entity reference outside document type declaration"))
1276 t)
1277 ;; If we don't do this, we can get thousands of errors when
1278 ;; a plain text file is parsed.
1279 ((not xmltok-internal-subset-start)
1280 (when (let ((err (car xmltok-errors)))
1281 (or (not err)
1282 (<= (xmltok-error-end err) xmltok-start)))
1283 (goto-char xmltok-start))
1284 nil)
1285 ((eq xmltok-type 'not-well-formed) t)
1286 (t
1287 (xmltok-add-error "Token allowed only inside markup declaration")
1288 t)))
1289
1290 (defun xmltok-parse-doctype ()
1291 (setq xmltok-markup-declaration-doctype-flag t)
1292 (xmltok-next-prolog-token)
1293 (when xmltok-internal-subset-start
1294 (xmltok-add-error "DOCTYPE declaration not allowed in internal subset")
1295 (xmltok-markup-declaration-parse-error))
1296 (when xmltok-contains-doctype
1297 (xmltok-add-error "Duplicate DOCTYPE declaration")
1298 (xmltok-markup-declaration-parse-error))
1299 (setq xmltok-contains-doctype t)
1300 (xmltok-require-token 'name 'prefixed-name)
1301 (xmltok-require-next-token "SYSTEM" "PUBLIC" ?\[ ?>)
1302 (cond ((eq xmltok-type ?\[)
1303 (setq xmltok-internal-subset-start (point)))
1304 ((eq xmltok-type ?>))
1305 (t
1306 (setq xmltok-doctype-external-subset-flag t)
1307 (xmltok-parse-external-id)
1308 (xmltok-require-token ?\[ ?>)
1309 (when (eq xmltok-type ?\[)
1310 (setq xmltok-internal-subset-start (point))))))
1311
1312 (defun xmltok-parse-attlist-declaration ()
1313 (xmltok-require-next-token 'prefixed-name 'name)
1314 (while (progn
1315 (xmltok-require-next-token ?> 'name 'prefixed-name)
1316 (if (eq xmltok-type ?>)
1317 nil
1318 (xmltok-require-next-token ?\(
1319 "CDATA"
1320 "ID"
1321 "IDREF"
1322 "IDREFS"
1323 "ENTITY"
1324 "ENTITIES"
1325 "NMTOKEN"
1326 "NMTOKENS"
1327 "NOTATION")
1328 (cond ((eq xmltok-type ?\()
1329 (xmltok-parse-nmtoken-group))
1330 ((string= (xmltok-current-token-string)
1331 "NOTATION")
1332 (xmltok-require-next-token ?\()
1333 (xmltok-parse-nmtoken-group)))
1334 (xmltok-require-next-token "#IMPLIED"
1335 "#REQUIRED"
1336 "#FIXED"
1337 'literal)
1338 (when (string= (xmltok-current-token-string) "#FIXED")
1339 (xmltok-require-next-token 'literal))
1340 t))))
1341
1342 (defun xmltok-parse-nmtoken-group ()
1343 (while (progn
1344 (xmltok-require-next-token 'nmtoken 'prefixed-name 'name)
1345 (xmltok-require-next-token ?| ?\))
1346 (eq xmltok-type ?|))))
1347
1348 (defun xmltok-parse-element-declaration ()
1349 (xmltok-require-next-token 'name 'prefixed-name)
1350 (xmltok-require-next-token "EMPTY" "ANY" ?\()
1351 (when (eq xmltok-type ?\()
1352 (xmltok-require-next-token "#PCDATA"
1353 'name
1354 'prefixed-name
1355 'name-occur
1356 ?\()
1357 (cond ((eq xmltok-type 'hash-name)
1358 (xmltok-require-next-token ?| ?\) 'close-paren-star)
1359 (while (eq xmltok-type ?|)
1360 (xmltok-require-next-token 'name 'prefixed-name)
1361 (xmltok-require-next-token 'close-paren-star ?|)))
1362 (t (xmltok-parse-model-group))))
1363 (xmltok-require-next-token ?>))
1364
1365 (defun xmltok-parse-model-group ()
1366 (xmltok-parse-model-group-member)
1367 (xmltok-require-next-token ?|
1368 ?,
1369 ?\)
1370 'close-paren-star
1371 'close-paren-occur)
1372 (when (memq xmltok-type '(?, ?|))
1373 (let ((connector xmltok-type))
1374 (while (progn
1375 (xmltok-next-prolog-token)
1376 (xmltok-parse-model-group-member)
1377 (xmltok-require-next-token connector
1378 ?\)
1379 'close-paren-star
1380 'close-paren-occur)
1381 (eq xmltok-type connector))))))
1382
1383 (defun xmltok-parse-model-group-member ()
1384 (xmltok-require-token 'name
1385 'prefixed-name
1386 'name-occur
1387 ?\()
1388 (when (eq xmltok-type ?\()
1389 (xmltok-next-prolog-token)
1390 (xmltok-parse-model-group)))
1391
1392 (defun xmltok-parse-entity-declaration ()
1393 (let (paramp name)
1394 (xmltok-require-next-token 'name ?%)
1395 (when (eq xmltok-type ?%)
1396 (setq paramp t)
1397 (xmltok-require-next-token 'name))
1398 (setq name (xmltok-current-token-string))
1399 (xmltok-require-next-token 'literal "SYSTEM" "PUBLIC")
1400 (cond ((eq xmltok-type 'literal)
1401 (let ((replacement (xmltok-parse-entity-value)))
1402 (unless paramp
1403 (xmltok-define-entity name replacement)))
1404 (xmltok-require-next-token ?>))
1405 (t
1406 (xmltok-parse-external-id)
1407 (if paramp
1408 (xmltok-require-token ?>)
1409 (xmltok-require-token ?> "NDATA")
1410 (if (eq xmltok-type ?>)
1411 (xmltok-define-entity name 'external)
1412 (xmltok-require-next-token 'name)
1413 (xmltok-require-next-token ?>)
1414 (xmltok-define-entity name 'unparsed)))))))
1415
1416 (defun xmltok-define-entity (name value)
1417 (when (and (or (not xmltok-had-param-entity-ref)
1418 xmltok-standalone)
1419 (not (assoc name xmltok-dtd)))
1420 (setq xmltok-dtd
1421 (cons (cons name value) xmltok-dtd))))
1422
1423 (defun xmltok-parse-entity-value ()
1424 (let ((lim (1- (point)))
1425 (well-formed t)
1426 value-parts
1427 start)
1428 (save-excursion
1429 (goto-char (1+ xmltok-start))
1430 (setq start (point))
1431 (while (progn
1432 (skip-chars-forward "^%&" lim)
1433 (when (< (point) lim)
1434 (goto-char (1+ (point)))
1435 (cond ((eq (char-before) ?%)
1436 (xmltok-add-error "Parameter entity references are not allowed in the internal subset"
1437 (1- (point))
1438 (point))
1439 (setq well-formed nil))
1440 (t
1441 (let ((xmltok-start (1- (point)))
1442 xmltok-type xmltok-replacement)
1443 (xmltok-scan-after-amp (lambda (start end)))
1444 (cond ((eq xmltok-type 'char-ref)
1445 (setq value-parts
1446 (cons (buffer-substring-no-properties
1447 start
1448 xmltok-start)
1449 value-parts))
1450 (setq value-parts
1451 (cons xmltok-replacement
1452 value-parts))
1453 (setq start (point)))
1454 ((eq xmltok-type 'not-well-formed)
1455 (setq well-formed nil))))))
1456 t))))
1457 (if (not well-formed)
1458 nil
1459 (apply 'concat
1460 (nreverse (cons (buffer-substring-no-properties start lim)
1461 value-parts))))))
1462
1463 (defun xmltok-parse-notation-declaration ()
1464 (xmltok-require-next-token 'name)
1465 (xmltok-require-next-token "SYSTEM" "PUBLIC")
1466 (let ((publicp (string= (xmltok-current-token-string) "PUBLIC")))
1467 (xmltok-require-next-token 'literal)
1468 (cond (publicp
1469 (xmltok-require-next-token 'literal ?>)
1470 (unless (eq xmltok-type ?>)
1471 (xmltok-require-next-token ?>)))
1472 (t (xmltok-require-next-token ?>)))))
1473
1474 (defun xmltok-parse-external-id ()
1475 (xmltok-require-token "SYSTEM" "PUBLIC")
1476 (let ((publicp (string= (xmltok-current-token-string) "PUBLIC")))
1477 (xmltok-require-next-token 'literal)
1478 (when publicp
1479 (xmltok-require-next-token 'literal)))
1480 (xmltok-next-prolog-token))
1481
1482 (defun xmltok-require-next-token (&rest types)
1483 (xmltok-next-prolog-token)
1484 (apply 'xmltok-require-token types))
1485
1486 (defun xmltok-require-token (&rest types)
1487 ;; XXX Generate a more helpful error message
1488 (while (and (not (let ((type (car types)))
1489 (if (stringp (car types))
1490 (string= (xmltok-current-token-string) type)
1491 (eq type xmltok-type))))
1492 (setq types (cdr types))))
1493 (unless types
1494 (when (and xmltok-type
1495 (not (eq xmltok-type 'not-well-formed)))
1496 (xmltok-add-error "Unexpected token"))
1497 (xmltok-markup-declaration-parse-error))
1498 (let ((region-type (xmltok-prolog-region-type (car types))))
1499 (when region-type
1500 (xmltok-add-prolog-region region-type
1501 xmltok-start
1502 (point)))))
1503
1504 (defun xmltok-current-token-string ()
1505 (buffer-substring-no-properties xmltok-start (point)))
1506
1507 (put 'xmltok-markup-declaration-parse-error
1508 'error-conditions
1509 '(error xmltok-markup-declaration-parse-error))
1510
1511 (put 'xmltok-markup-declaration-parse-error
1512 'error-message
1513 "Syntax error in markup declaration")
1514
1515 (defun xmltok-markup-declaration-parse-error ()
1516 (signal 'xmltok-markup-declaration-parse-error nil))
1517
1518 (defun xmltok-skip-markup-declaration ()
1519 (while (cond ((eq xmltok-type ?>)
1520 (xmltok-next-prolog-token)
1521 nil)
1522 ((and xmltok-markup-declaration-doctype-flag
1523 (eq xmltok-type ?\[))
1524 (setq xmltok-internal-subset-start (point))
1525 (xmltok-next-prolog-token)
1526 nil)
1527 ((memq xmltok-type '(nil
1528 end-prolog
1529 named-markup-declaration
1530 comment
1531 processing-instruction))
1532 nil)
1533 ((and xmltok-internal-subset-start
1534 (eq xmltok-type 'internal-subset-close))
1535 nil)
1536 (t (xmltok-next-prolog-token) t)))
1537 xmltok-type)
1538
1539 (defun xmltok-prolog-region-type (required)
1540 (cond ((cdr (assq xmltok-type
1541 '((literal . literal)
1542 (?> . markup-declaration-close)
1543 (?\[ . internal-subset-open)
1544 (hash-name . hash-name)))))
1545 ((and (stringp required) (eq xmltok-type 'name))
1546 'keyword)))
1547
1548 ;; Return new token type.
1549
1550 (defun xmltok-next-prolog-token ()
1551 (skip-chars-forward " \t\r\n")
1552 (setq xmltok-start (point))
1553 (cond ((not (and (looking-at (xmltok-prolog regexp))
1554 (goto-char (match-end 0))))
1555 (let ((ch (char-after)))
1556 (cond (ch
1557 (goto-char (1+ (point)))
1558 (xmltok-add-error "Illegal char in prolog")
1559 (setq xmltok-type 'not-well-formed))
1560 (t (setq xmltok-type nil)))))
1561 ((or (xmltok-prolog start ncname-occur)
1562 (xmltok-prolog start colon-name-occur))
1563 (setq xmltok-name-end (1- (point)))
1564 (setq xmltok-name-colon (xmltok-prolog start colon))
1565 (setq xmltok-type 'name-occur))
1566 ((xmltok-prolog start colon)
1567 (setq xmltok-name-end (point))
1568 (setq xmltok-name-colon (xmltok-prolog start colon))
1569 (unless (looking-at "[ \t\r\n>),|[%]")
1570 (xmltok-add-error "Missing space after name"))
1571 (setq xmltok-type 'prefixed-name))
1572 ((or (xmltok-prolog start ncname-nmtoken)
1573 (xmltok-prolog start ncname-colon))
1574 (unless (looking-at "[ \t\r\n>),|[%]")
1575 (xmltok-add-error "Missing space after name token"))
1576 (setq xmltok-type 'nmtoken))
1577 ((xmltok-prolog start name)
1578 (setq xmltok-name-end (point))
1579 (setq xmltok-name-colon nil)
1580 (unless (looking-at "[ \t\r\n>),|[%]")
1581 (xmltok-add-error "Missing space after name"))
1582 (setq xmltok-type 'name))
1583 ((xmltok-prolog start hash-name)
1584 (setq xmltok-name-end (point))
1585 (unless (looking-at "[ \t\r\n>)|%]")
1586 (xmltok-add-error "Missing space after name"))
1587 (setq xmltok-type 'hash-name))
1588 ((xmltok-prolog start processing-instruction-question)
1589 (xmltok-scan-prolog-after-processing-instruction-open))
1590 ((xmltok-prolog start comment-open)
1591 ;; XXX if not-well-formed, ignore some stuff
1592 (xmltok-scan-after-comment-open))
1593 ((xmltok-prolog start named-markup-declaration)
1594 (setq xmltok-type 'named-markup-declaration))
1595 ((xmltok-prolog start instance-start)
1596 (goto-char xmltok-start)
1597 (setq xmltok-type 'end-prolog))
1598 ((xmltok-prolog start close-paren-star)
1599 (setq xmltok-type 'close-paren-star))
1600 ((xmltok-prolog start close-paren-occur)
1601 (setq xmltok-type 'close-paren-occur))
1602 ((xmltok-prolog start close-paren)
1603 (unless (looking-at "[ \t\r\n>,|)]")
1604 (xmltok-add-error "Missing space after )"))
1605 (setq xmltok-type ?\)))
1606 ((xmltok-prolog start single-char)
1607 (let ((ch (char-before)))
1608 (cond ((memq ch '(?\" ?\'))
1609 (xmltok-scan-prolog-literal))
1610 (t (setq xmltok-type ch)))))
1611 ((xmltok-prolog start percent)
1612 (cond ((xmltok-prolog start param-entity-ref-close)
1613 (setq xmltok-name-end (1- (point)))
1614 (setq xmltok-type 'param-entity-ref))
1615 ((xmltok-prolog start param-entity-ref)
1616 (xmltok-add-error "Missing ;")
1617 (setq xmltok-name-end (point))
1618 (setq xmltok-type 'param-entity-ref))
1619 ((looking-at "[ \t\r\n%]")
1620 (setq xmltok-type ?%))
1621 (t
1622 (xmltok-add-error "Expected name after %")
1623 (setq xmltok-type 'not-well-formed))))
1624 ((xmltok-prolog start nmtoken)
1625 (unless (looking-at "[ \t\r\n>),|[%]")
1626 (xmltok-add-error "Missing space after name token"))
1627 (setq xmltok-type 'nmtoken))
1628 ((xmltok-prolog start internal-subset-close)
1629 (setq xmltok-type 'internal-subset-close))
1630 ((xmltok-prolog start pound)
1631 (xmltok-add-error "Expected name after #")
1632 (setq xmltok-type 'not-well-formed))
1633 ((xmltok-prolog start markup-declaration)
1634 (xmltok-add-error "Expected name or -- after <!")
1635 (setq xmltok-type 'not-well-formed))
1636 ((xmltok-prolog start comment-first-dash)
1637 (xmltok-add-error "Expected <!--")
1638 (setq xmltok-type 'not-well-formed))
1639 ((xmltok-prolog start less-than)
1640 (xmltok-add-error "Incomplete markup")
1641 (setq xmltok-type 'not-well-formed))
1642 (t (error "Unhandled token in prolog %s"
1643 (match-string-no-properties 0)))))
1644
1645 (defun xmltok-scan-prolog-literal ()
1646 (let* ((delim (string (char-before)))
1647 (safe-end (save-excursion
1648 (skip-chars-forward (concat "^<>[]" delim))
1649 (point)))
1650 (end (save-excursion
1651 (goto-char safe-end)
1652 (search-forward delim nil t))))
1653 (or (cond ((not end)
1654 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
1655 nil
1656 nil
1657 delim)
1658 nil)
1659 ((save-excursion
1660 (goto-char end)
1661 (looking-at "[ \t\r\n>%[]"))
1662 (goto-char end)
1663 (setq xmltok-type 'literal))
1664 ((eq (1+ safe-end) end)
1665 (goto-char end)
1666 (xmltok-add-error (format "Missing space after %s" delim)
1667 safe-end)
1668 (setq xmltok-type 'literal))
1669 (t
1670 (xmltok-add-dependent 'xmltok-semi-closed-reparse-p
1671 xmltok-start
1672 (1+ end)
1673 delim
1674 1)
1675 nil))
1676 (progn
1677 (xmltok-add-error (format "Missing closing %s" delim))
1678 (goto-char safe-end)
1679 (skip-chars-backward " \t\r\n")
1680 (setq xmltok-type 'not-well-formed)))))
1681
1682 (defun xmltok-scan-prolog-after-processing-instruction-open ()
1683 (cond ((not (search-forward "?>" nil t))
1684 (xmltok-add-error "No closing ?>"
1685 xmltok-start
1686 (+ xmltok-start 2))
1687 (xmltok-add-dependent 'xmltok-unclosed-reparse-p
1688 nil
1689 nil
1690 "?>")
1691 (setq xmltok-type 'not-well-formed))
1692 (t
1693 (let* ((end (point))
1694 (target
1695 (save-excursion
1696 (goto-char (+ xmltok-start 2))
1697 (and (looking-at (xmltok-ncname regexp))
1698 (or (memq (char-after (match-end 0))
1699 '(?\n ?\t ?\r ? ))
1700 (= (match-end 0) (- end 2)))
1701 (match-string-no-properties 0)))))
1702 (cond ((not target)
1703 (xmltok-add-error "\
1704 Processing instruction does not start with a name"
1705 (+ xmltok-start 2)
1706 (+ xmltok-start 3)))
1707 ((not (and (= (length target) 3)
1708 (let ((case-fold-search t))
1709 (string-match "xml" target)))))
1710 ((= xmltok-start 1)
1711 (xmltok-add-error "Invalid XML declaration"
1712 xmltok-start
1713 (point)))
1714 ((save-excursion
1715 (goto-char xmltok-start)
1716 (looking-at (xmltok-xml-declaration regexp)))
1717 (xmltok-add-error "XML declaration not at beginning of file"
1718 xmltok-start
1719 (point)))
1720 (t
1721 (xmltok-add-error "Processing instruction has target of xml"
1722 (+ xmltok-start 2)
1723 (+ xmltok-start 5))))
1724 (xmltok-add-prolog-region 'processing-instruction-left
1725 xmltok-start
1726 (+ xmltok-start
1727 2
1728 (if target
1729 (length target)
1730 0)))
1731 (xmltok-add-prolog-region 'processing-instruction-right
1732 (if target
1733 (save-excursion
1734 (goto-char (+ xmltok-start
1735 (length target)
1736 2))
1737 (skip-chars-forward " \t\r\n")
1738 (point))
1739 (+ xmltok-start 2))
1740 (point)))
1741 (setq xmltok-type 'processing-instruction))))
1742
1743 (defun xmltok-parse-entities ()
1744 (let ((todo xmltok-dtd))
1745 (when (and (or xmltok-had-param-entity-ref
1746 xmltok-doctype-external-subset-flag)
1747 (not xmltok-standalone))
1748 (setq xmltok-dtd (cons t xmltok-dtd)))
1749 (while todo
1750 (xmltok-parse-entity (car todo))
1751 (setq todo (cdr todo)))))
1752
1753 (defun xmltok-parse-entity (name-def)
1754 (let ((def (cdr name-def))
1755 ;; in case its value is buffer local
1756 (xmltok-dtd xmltok-dtd)
1757 buf)
1758 (when (stringp def)
1759 (if (string-match "\\`[^&<\t\r\n]*\\'" def)
1760 (setcdr name-def (cons def def))
1761 (setcdr name-def 'not-well-formed) ; avoid infinite expansion loops
1762 (setq buf (get-buffer-create
1763 (format " *Entity %s*" (car name-def))))
1764 (save-excursion
1765 (set-buffer buf)
1766 (erase-buffer)
1767 (insert def)
1768 (goto-char (point-min))
1769 (setcdr name-def
1770 (xmltok-parse-entity-replacement)))
1771 (kill-buffer buf)))))
1772
1773 (defun xmltok-parse-entity-replacement ()
1774 (let ((def (cons "" "")))
1775 (while (let* ((start (point))
1776 (found (re-search-forward "[<&\t\r\n]\\|]]>" nil t))
1777 (ch (and found (char-before)))
1778 (str (buffer-substring-no-properties
1779 start
1780 (if found
1781 (match-beginning 0)
1782 (point-max)))))
1783 (setq def
1784 (xmltok-append-entity-def def
1785 (cons str str)))
1786 (cond ((not found) nil)
1787 ((eq ch ?>)
1788 (setq def 'not-well-formed)
1789 nil)
1790 ((eq ch ?<)
1791 (xmltok-save
1792 (setq xmltok-start (1- (point)))
1793 (xmltok-scan-after-lt)
1794 (setq def
1795 (xmltok-append-entity-def
1796 def
1797 (cond ((memq xmltok-type
1798 '(start-tag
1799 end-tag
1800 empty-element))
1801 'element)
1802 ((memq xmltok-type
1803 '(comment
1804 processing-instruction))
1805 (cons "" nil))
1806 ((eq xmltok-type
1807 'cdata-section)
1808 (cons (buffer-substring-no-properties
1809 (+ xmltok-start 9)
1810 (- (point) 3))
1811 nil))
1812 (t 'not-well-formed)))))
1813 t)
1814 ((eq ch ?&)
1815 (let ((xmltok-start (1- (point)))
1816 xmltok-type
1817 xmltok-replacement
1818 xmltok-errors)
1819 (xmltok-scan-after-amp 'xmltok-handle-nested-entity)
1820 (cond ((eq xmltok-type 'entity-ref)
1821 (setq def
1822 (xmltok-append-entity-def
1823 def
1824 xmltok-replacement)))
1825 ((eq xmltok-type 'char-ref)
1826 (setq def
1827 (xmltok-append-entity-def
1828 def
1829 (if xmltok-replacement
1830 (cons xmltok-replacement
1831 xmltok-replacement)
1832 (and xmltok-errors 'not-well-formed)))))
1833 (t
1834 (setq def 'not-well-formed))))
1835 t)
1836 (t
1837 (setq def
1838 (xmltok-append-entity-def
1839 def
1840 (cons (match-string-no-properties 0)
1841 " ")))
1842 t))))
1843 def))
1844
1845 (defun xmltok-handle-nested-entity (start end)
1846 (let* ((name-def (assoc (buffer-substring-no-properties start end)
1847 xmltok-dtd))
1848 (def (cdr name-def)))
1849 (when (stringp def)
1850 (xmltok-parse-entity name-def)
1851 (setq def (cdr name-def)))
1852 (setq xmltok-replacement
1853 (cond ((null name-def)
1854 (if (eq (car xmltok-dtd) t)
1855 nil
1856 'not-well-formed))
1857 ((eq def 'unparsed) 'not-well-formed)
1858 (t def)))))
1859
1860 (defun xmltok-append-entity-def (d1 d2)
1861 (cond ((consp d1)
1862 (if (consp d2)
1863 (cons (concat (car d1) (car d2))
1864 (and (cdr d1)
1865 (cdr d2)
1866 (concat (cdr d1) (cdr d2))))
1867 d2))
1868 ((consp d2) d1)
1869 (t
1870 (let ((defs '(not-well-formed external element)))
1871 (while (not (or (eq (car defs) d1)
1872 (eq (car defs) d2)))
1873 (setq defs (cdr defs)))
1874 (car defs)))))
1875
1876 (defun xmltok-add-prolog-region (type start end)
1877 (setq xmltok-prolog-regions
1878 (cons (vector type start end)
1879 xmltok-prolog-regions)))
1880
1881 (defun xmltok-merge-attributes ()
1882 "Return a list merging `xmltok-attributes' and `xmltok-namespace-attributes'.
1883 The members of the merged list are in order of occurrence in the
1884 document. The list may share list structure with `xmltok-attributes'
1885 and `xmltok-namespace-attributes'."
1886 (cond ((not xmltok-namespace-attributes)
1887 xmltok-attributes)
1888 ((not xmltok-attributes)
1889 xmltok-namespace-attributes)
1890 (t
1891 (let ((atts1 xmltok-attributes)
1892 (atts2 xmltok-namespace-attributes)
1893 merged)
1894 (while (and atts1 atts2)
1895 (cond ((< (xmltok-attribute-name-start (car atts1))
1896 (xmltok-attribute-name-start (car atts2)))
1897 (setq merged (cons (car atts1) merged))
1898 (setq atts1 (cdr atts1)))
1899 (t
1900 (setq merged (cons (car atts2) merged))
1901 (setq atts2 (cdr atts2)))))
1902 (setq merged (nreverse merged))
1903 (cond (atts1 (setq merged (nconc merged atts1)))
1904 (atts2 (setq merged (nconc merged atts2))))
1905 merged))))
1906
1907 ;;; Testing
1908
1909 (defun xmltok-forward-test ()
1910 (interactive)
1911 (if (xmltok-forward)
1912 (message "Scanned %s" xmltok-type)
1913 (message "Scanned nothing")))
1914
1915 (defun xmltok-next-prolog-token-test ()
1916 (interactive)
1917 (if (xmltok-next-prolog-token)
1918 (message "Scanned %s"
1919 (if (integerp xmltok-type)
1920 (string xmltok-type)
1921 xmltok-type))
1922 (message "Scanned end of file")))
1923
1924 (provide 'xmltok)
1925
1926 ;; arch-tag: 747e5f3a-6fc3-4f8d-bd96-89f05aa99f5e
1927 ;;; xmltok.el ends here