X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/c1473b4cfeb477ced05d457868c5e1eb97a58eb0..95a2cb24b0697558e6629460d8bc693b394f0138:/lisp/nxml/xmltok.el?ds=sidebyside diff --git a/lisp/nxml/xmltok.el b/lisp/nxml/xmltok.el index 1c20d63d9f..fe6a6050be 100644 --- a/lisp/nxml/xmltok.el +++ b/lisp/nxml/xmltok.el @@ -1,9 +1,9 @@ -;;; xmltok.el --- XML tokenization +;;; xmltok.el --- XML tokenization -*- lexical-binding:t -*- -;; Copyright (C) 2003, 2007, 2008 Free Software Foundation, Inc. +;; Copyright (C) 2003, 2007-2015 Free Software Foundation, Inc. ;; Author: James Clark -;; Keywords: XML +;; Keywords: wp, hypermedia, languages, XML ;; This file is part of GNU Emacs. @@ -117,8 +117,8 @@ first member of the alist is t if references to entities not in the alist are well-formed \(e.g. because there's an external subset that wasn't parsed). -Each general entity name is a string. The definition is either nil, a -symbol, a string, a cons cell. If the definition is nil, then it +Each general entity name is a string. The definition is either nil, +a symbol, a string, a cons cell. If the definition is nil, then it means that it's an internal entity but the result of parsing it is unknown. If it is a symbol, then the symbol is either `unparsed', meaning the entity is an unparsed entity, `external', meaning the @@ -126,39 +126,12 @@ entity is or references an external entity, `element', meaning the entity includes one or more elements, or `not-well-formed', meaning the replacement text is not well-formed. If the definition is a string, then the replacement text of the entity is that string; this -happens only during the parsing of the prolog. If the definition is a -cons cell \(ER . AR), then ER specifies the string that results from -referencing the entity in element content and AR is either nil, +happens only during the parsing of the prolog. If the definition is +a cons cell \(ER . AR), then ER specifies the string that results +from referencing the entity in element content and AR is either nil, meaning the replacement text included a <, or a string which is the normalized attribute value.") -(defvar xmltok-dependent-regions nil - "List of descriptors of regions that a parsed token depends on. - -A token depends on a region if the region occurs after the token and a -change in the region may require the token to be reparsed. This only -happens with markup that is not well-formed. For example, if a , then the then the buffer must be reparsed from the space-count 0) (setq xmltok-type 'space)) (t - (goto-char (1+ (point))) + (forward-char 1) (xmltok-scan-after-lt)))) ((eq ch ?\&) (cond ((> space-count 0) (setq xmltok-type 'space)) (t - (goto-char (1+ (point))) - (xmltok-scan-after-amp - (lambda (start end) - (xmltok-handle-entity start end)))))) + (forward-char 1) + (xmltok-scan-after-amp 'xmltok-handle-entity)))) ((re-search-forward "[<&]\\|\\(]]>\\)" nil t) (cond ((not (match-beginning 1)) (goto-char (match-beginning 0)) @@ -351,8 +312,8 @@ VALUE-BEGIN and VALUE-END, otherwise a STRING giving the value." "Scan forward past the first special token starting at or after point. Return nil if there is no special token that starts before BOUND. CDATA sections, processing instructions and comments (and indeed -anything starting with < following by ? or !) count -as special. Return the type of the token." +anything starting with < following by ? or !) count as special. +Return the type of the token." (when (re-search-forward "<[?!]" (1+ bound) t) (setq xmltok-start (match-beginning 0)) (goto-char (1+ xmltok-start)) @@ -392,7 +353,7 @@ as special. Return the type of the token." (cons (concat "\\(" ,sym "\\)") (cons ',name nil)) (cons (concat "\\(" (car ,sym) "\\)") (cons ',name (cdr ,sym))))))) - (defun xmltok-p (&rest r) (xmltok+ "\\(?:" + (defun xmltok-p (&rest r) (xmltok+ "\\(?:" (apply 'xmltok+ r) "\\)")) @@ -443,12 +404,11 @@ as special. Return the type of the token." (list 'match-string-no-properties (xmltok-get-index group-name ',(cdr r)))) (t (error "Invalid action: %s" action)))))))) - + (eval-when-compile (let* ((or "\\|") (open "\\(?:") - (gopen "\\(") (close "\\)") (name-start-char "[_[:alpha:]]") (name-continue-not-start-char "[-.[:digit:]]") @@ -685,14 +645,8 @@ as special. Return the type of the token." (setq xmltok-type 'empty-element)) ((xmltok-after-lt start cdata-section-open) (setq xmltok-type - (if (search-forward "]]>" nil t) - 'cdata-section - (xmltok-add-error "No closing ]]>") - (xmltok-add-dependent 'xmltok-unclosed-reparse-p - nil - nil - "]]>") - 'not-well-formed))) + (progn (search-forward "]]>" nil 'move) + 'cdata-section))) ((xmltok-after-lt start processing-instruction-question) (xmltok-scan-after-processing-instruction-open)) ((xmltok-after-lt start comment-open) @@ -759,68 +713,45 @@ as special. Return the type of the token." ;; xmltok-scan-prolog-after-processing-instruction-open ;; XXX maybe should include rest of line (up to any <,>) in unclosed PI (defun xmltok-scan-after-processing-instruction-open () - (cond ((not (search-forward "?>" nil t)) - (xmltok-add-error "No closing ?>" - xmltok-start - (+ xmltok-start 2)) - (xmltok-add-dependent 'xmltok-unclosed-reparse-p - nil - nil - "?>") - (setq xmltok-type 'not-well-formed)) - (t - (cond ((not (save-excursion - (goto-char (+ 2 xmltok-start)) - (and (looking-at (xmltok-ncname regexp)) - (setq xmltok-name-end (match-end 0))))) - (setq xmltok-name-end (+ xmltok-start 2)) - (xmltok-add-error "" nil 'move) + (cond ((not (save-excursion + (goto-char (+ 2 xmltok-start)) + (and (looking-at (xmltok-ncname regexp)) + (setq xmltok-name-end (match-end 0))))) + (setq xmltok-name-end (+ xmltok-start 2)) + (xmltok-add-error "") - (xmltok-add-dependent 'xmltok-unclosed-reparse-p - nil - nil - ;; not --> because - ;; -- is not allowed - ;; in comments in XML - "--") - 'not-well-formed) - ((eq (char-after) ?>) - (goto-char (1+ (point))) - 'comment) - (t - (xmltok-add-dependent - 'xmltok-semi-closed-reparse-p - nil - (point) - "--" - 2) - ;; just include the