X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/2e9bdf15e6c7ffbd950a727a92ef7c7e792040e1..b0c9a334c2f0eb881eff47f590997e746cc3bdb3:/lisp/xml.el diff --git a/lisp/xml.el b/lisp/xml.el index 61a79b3710..5fdb6f33b1 100644 --- a/lisp/xml.el +++ b/lisp/xml.el @@ -1,6 +1,7 @@ ;;; xml.el --- XML parser -;; Copyright (C) 2000, 01, 03, 2004 Free Software Foundation, Inc. +;; Copyright (C) 2000, 2001, 2002, 2003, 2004, +;; 2005 Free Software Foundation, Inc. ;; Author: Emmanuel Briot ;; Maintainer: Mark A. Hershberger @@ -20,20 +21,20 @@ ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. ;;; Commentary: ;; This file contains a somewhat incomplete non-validating XML parser. It ;; parses a file, and returns a list that can be used internally by -;; any other lisp libraries. +;; any other Lisp libraries. ;;; FILE FORMAT ;; The document type declaration may either be ignored or (optionally) ;; parsed, but currently the parsing will only accept element -;; declarations. The XML file is assumed to be well-formed. In case +;; declarations. The XML file is assumed to be well-formed. In case ;; of error, the parsing stops and the XML file is shown where the ;; parsing stopped. ;; @@ -44,7 +45,7 @@ ;; value2 ;; value3 ;; -;; Of course, the name of the nodes and attributes can be anything. There can +;; Of course, the name of the nodes and attributes can be anything. There can ;; be any number of attributes (or none), as well as any number of children ;; below the nodes. ;; @@ -52,15 +53,15 @@ ;;; LIST FORMAT -;; The functions `xml-parse-file' and `xml-parse-tag' return a list with -;; the following format: +;; The functions `xml-parse-file', `xml-parse-region' and +;; `xml-parse-tag' return a list with the following format: ;; ;; xml-list ::= (node node ...) -;; node ::= (tag_name attribute-list . child_node_list) +;; node ::= (qname attribute-list . child_node_list) ;; child_node_list ::= child_node child_node ... ;; child_node ::= node | string -;; tag_name ::= string -;; attribute_list ::= (("attribute" . "value") ("attribute" . "value") ...) +;; qname ::= (:namespace-uri . "name") | "name" +;; attribute_list ::= ((qname . "value") (qname . "value") ...) ;; | nil ;; string ::= "..." ;; @@ -68,6 +69,11 @@ ;; Whitespace is preserved. Fixme: There should be a tree-walker that ;; can remove it. +;; TODO: +;; * xml:base, xml:space support +;; * more complete DOCTYPE parsing +;; * pi support + ;;; Code: ;; Note that {buffer-substring,match-string}-no-properties were @@ -79,9 +85,37 @@ ;;** ;;******************************************************************* +(defconst xml-undefined-entity "?" + "What to substitute for undefined entities") + +(defvar xml-entity-alist + '(("lt" . "<") + ("gt" . ">") + ("apos" . "'") + ("quot" . "\"") + ("amp" . "&")) + "The defined entities. Entities are added to this when the DTD is parsed.") + +(defvar xml-sub-parser nil + "Dynamically set this to a non-nil value if you want to parse an XML fragment.") + +(defvar xml-validating-parser nil + "Set to non-nil to get validity checking.") + (defsubst xml-node-name (node) "Return the tag associated with NODE. -The tag is a lower-case symbol." +Without namespace-aware parsing, the tag is a symbol. + +With namespace-aware parsing, the tag is a cons of a string +representing the uri of the namespace with the local name of the +tag. For example, + + + +would be represented by + + '(\"\" . \"foo\")." + (car node)) (defsubst xml-node-attributes (node) @@ -96,17 +130,17 @@ This is a list of nodes, and it can be nil." (defun xml-get-children (node child-name) "Return the children of NODE whose tag is CHILD-NAME. -CHILD-NAME should be a lower case symbol." +CHILD-NAME should match the value returned by `xml-node-name'." (let ((match ())) (dolist (child (xml-node-children node)) - (if child - (if (equal (xml-node-name child) child-name) - (push child match)))) + (if (and (listp child) + (equal (xml-node-name child) child-name)) + (push child match))) (nreverse match))) (defun xml-get-attribute-or-nil (node attribute) "Get from NODE the value of ATTRIBUTE. -Return `nil' if the attribute was not found. +Return nil if the attribute was not found. See also `xml-get-attribute'." (cdr (assoc attribute (xml-node-attributes node)))) @@ -148,6 +182,80 @@ If PARSE-NS is non-nil, then QNAMES are expanded." (kill-buffer (current-buffer))) xml))) + +(defvar xml-name-re) +(defvar xml-entity-value-re) +(defvar xml-att-def-re) +(let* ((start-chars (concat "[:alpha:]:_")) + (name-chars (concat "-[:digit:]." start-chars)) +;;[3] S ::= (#x20 | #x9 | #xD | #xA)+ + (whitespace "[ \t\n\r]")) +;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] +;; | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] +;; | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] +;; | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] + (defvar xml-name-start-char-re (concat "[" start-chars "]")) +;;[4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040] + (defvar xml-name-char-re (concat "[" name-chars "]")) +;;[5] Name ::= NameStartChar (NameChar)* + (defvar xml-name-re (concat xml-name-start-char-re xml-name-char-re "*")) +;;[6] Names ::= Name (#x20 Name)* + (defvar xml-names-re (concat xml-name-re "\\(?: " xml-name-re "\\)*")) +;;[7] Nmtoken ::= (NameChar)+ + (defvar xml-nmtoken-re (concat xml-name-char-re "+")) +;;[8] Nmtokens ::= Nmtoken (#x20 Nmtoken)* + (defvar xml-nmtokens-re (concat xml-nmtoken-re "\\(?: " xml-name-re "\\)*")) +;;[66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';' + (defvar xml-char-ref-re "\\(?:&#[0-9]+;\\|&#x[0-9a-fA-F]+;\\)") +;;[68] EntityRef ::= '&' Name ';' + (defvar xml-entity-ref (concat "&" xml-name-re ";")) +;;[69] PEReference ::= '%' Name ';' + (defvar xml-pe-reference-re (concat "%" xml-name-re ";")) +;;[67] Reference ::= EntityRef | CharRef + (defvar xml-reference-re (concat "\\(?:" xml-entity-ref "\\|" xml-char-ref-re "\\)")) +;;[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" + (defvar xml-att-value-re (concat "\\(?:\"\\(?:[^&\"]\\|" xml-reference-re "\\)*\"\\|" + "'\\(?:[^&']\\|" xml-reference-re "\\)*'\\)")) +;;[56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default] +;; | 'IDREF' [VC: IDREF] +;; | 'IDREFS' [VC: IDREF] +;; | 'ENTITY' [VC: Entity Name] +;; | 'ENTITIES' [VC: Entity Name] +;; | 'NMTOKEN' [VC: Name Token] +;; | 'NMTOKENS' [VC: Name Token] + (defvar xml-tokenized-type-re "\\(?:ID\\|IDREF\\|IDREFS\\|ENTITY\\|ENTITIES\\|NMTOKEN\\|NMTOKENS\\)") +;;[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' + (defvar xml-notation-type-re (concat "\\(?:NOTATION" whitespace "(" whitespace "*" xml-name-re + "\\(?:" whitespace "*|" whitespace "*" xml-name-re "\\)*" whitespace "*)\\)")) +;;[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration] [VC: No Duplicate Tokens] + (defvar xml-enumeration-re (concat "\\(?:(" whitespace "*" xml-nmtoken-re + "\\(?:" whitespace "*|" whitespace "*" xml-nmtoken-re "\\)*" + whitespace ")\\)")) +;;[57] EnumeratedType ::= NotationType | Enumeration + (defvar xml-enumerated-type-re (concat "\\(?:" xml-notation-type-re "\\|" xml-enumeration-re "\\)")) +;;[54] AttType ::= StringType | TokenizedType | EnumeratedType +;;[55] StringType ::= 'CDATA' + (defvar xml-att-type-re (concat "\\(?:CDATA\\|" xml-tokenized-type-re "\\|" xml-notation-type-re"\\|" xml-enumerated-type-re "\\)")) +;;[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) + (defvar xml-default-decl-re (concat "\\(?:#REQUIRED\\|#IMPLIED\\|\\(?:#FIXED" whitespace "\\)*" xml-att-value-re "\\)")) +;;[53] AttDef ::= S Name S AttType S DefaultDecl + (defvar xml-att-def-re (concat "\\(?:" whitespace "*" xml-name-re + whitespace "*" xml-att-type-re + whitespace "*" xml-default-decl-re "\\)")) +;;[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' +;; | "'" ([^%&'] | PEReference | Reference)* "'" + (defvar xml-entity-value-re (concat "\\(?:\"\\(?:[^%&\"]\\|" xml-pe-reference-re + "\\|" xml-reference-re "\\)*\"\\|'\\(?:[^%&']\\|" + xml-pe-reference-re "\\|" xml-reference-re "\\)*'\\)"))) +;;[75] ExternalID ::= 'SYSTEM' S SystemLiteral +;; | 'PUBLIC' S PubidLiteral S SystemLiteral +;;[76] NDataDecl ::= S 'NDATA' S +;;[73] EntityDef ::= EntityValue| (ExternalID NDataDecl?) +;;[71] GEDecl ::= '' +;;[74] PEDef ::= EntityValue | ExternalID +;;[72] PEDecl ::= '' +;;[70] EntityDecl ::= GEDecl | PEDecl + ;; Note that this is setup so that we can do whitespace-skipping with ;; `(skip-syntax-forward " ")', inter alia. Previously this was slow ;; compared with `re-search-forward', but that has been fixed. Also @@ -213,9 +321,9 @@ If PARSE-NS is non-nil, then QNAMES are expanded." (progn (forward-char -1) (setq result (xml-parse-tag parse-dtd parse-ns)) - (if (and xml result) + (if (and xml result (not xml-sub-parser)) ;; translation of rule [1] of XML specifications - (error "XML files can have only one toplevel tag") + (error "XML: (Not Well-Formed) Only one root tag allowed") (cond ((null result)) ((and (listp (car result)) @@ -230,72 +338,42 @@ If PARSE-NS is non-nil, then QNAMES are expanded." (cons dtd (nreverse xml)) (nreverse xml))))))) -(defun xml-ns-parse-ns-attrs (attr-list &optional xml-ns) - "Parse the namespace attributes and return a list of cons in the form: -\(namespace . prefix)" - - (mapcar - (lambda (attr) - (let* ((splitup (split-string (car attr) ":")) - (prefix (nth 0 splitup)) - (lname (nth 1 splitup))) - (when (string= "xmlns" prefix) - (push (cons (if lname - lname - "") - (cdr attr)) - xml-ns)))) attr-list) - xml-ns) - -;; expand element names -(defun xml-ns-expand-el (el xml-ns) - "Expand the XML elements from \"prefix:local-name\" to a cons in the form -\"(namespace . local-name)\"." - - (let* ((splitup (split-string el ":")) - (lname (or (nth 1 splitup) - (nth 0 splitup))) - (prefix (if (nth 1 splitup) - (nth 0 splitup) - (if (string= lname "xmlns") - "xmlns" - ""))) - (ns (cdr (assoc-string prefix xml-ns)))) - (if (string= "" ns) - lname - (cons (intern (concat ":" ns)) - lname)))) - -;; expand attribute names -(defun xml-ns-expand-attr (attr-list xml-ns) - "Expand the attribute list for a particular element from the form -\"prefix:local-name\" to the form \"{namespace}:local-name\"." - - (mapcar - (lambda (attr) - (let* ((splitup (split-string (car attr) ":")) - (lname (or (nth 1 splitup) - (nth 0 splitup))) - (prefix (if (nth 1 splitup) - (nth 0 splitup) - (if (string= (car attr) "xmlns") - "xmlns" - ""))) - (ns (cdr (assoc-string prefix xml-ns)))) - (setcar attr - (if (string= "" ns) - lname - (cons (intern (concat ":" ns)) - lname))))) - attr-list) - attr-list) - -(defun xml-intern-attrlist (attr-list) - "Convert attribute names to symbols for backward compatibility." - (mapcar (lambda (attr) - (setcar attr (intern (car attr)))) - attr-list) - attr-list) +(defun xml-maybe-do-ns (name default xml-ns) + "Perform any namespace expansion. +NAME is the name to perform the expansion on. +DEFAULT is the default namespace. XML-NS is a cons of namespace +names to uris. When namespace-aware parsing is off, then XML-NS +is nil. + +During namespace-aware parsing, any name without a namespace is +put into the namespace identified by DEFAULT. nil is used to +specify that the name shouldn't be given a namespace." + (if (consp xml-ns) + (let* ((nsp (string-match ":" name)) + (lname (if nsp (substring name (match-end 0)) name)) + (prefix (if nsp (substring name 0 (match-beginning 0)) default)) + (special (and (string-equal lname "xmlns") (not prefix))) + ;; Setting default to nil will insure that there is not + ;; matching cons in xml-ns. In which case we + (ns (or (cdr (assoc (if special "xmlns" prefix) + xml-ns)) + ""))) + (cons ns (if special "" lname))) + (intern name))) + +(defun xml-parse-fragment (&optional parse-dtd parse-ns) + "Parse xml-like fragments." + (let ((xml-sub-parser t) + children) + (while (not (eobp)) + (let ((bit (xml-parse-tag + parse-dtd parse-ns))) + (if children + (setq children (append (list bit) children)) + (if (stringp bit) + (setq children (list bit)) + (setq children bit))))) + (reverse children))) (defun xml-parse-tag (&optional parse-dtd parse-ns) "Parse the tag at point. @@ -306,12 +384,15 @@ Returns one of: - a list : the matching node - nil : the point is not looking at a tag. - a pair : the first element is the DTD, the second is the node." - (let ((xml-ns (if (consp parse-ns) + (let ((xml-validating-parser (or parse-dtd xml-validating-parser)) + (xml-ns (if (consp parse-ns) parse-ns (if parse-ns (list - ;; Default no namespace - (cons "" "") + ;; Default for empty prefix is no namespace + (cons "" "") + ;; "xml" namespace + (cons "xml" "http://www.w3.org/XML/1998/namespace") ;; We need to seed the xmlns namespace (cons "xmlns" "http://www.w3.org/2000/xmlns/")))))) (cond @@ -325,18 +406,17 @@ Returns one of: ((looking-at "" nil t) - (error "CDATA section does not end anywhere in the document")) - (buffer-substring pos (match-beginning 0)))) + (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document")) + (concat + (buffer-substring pos (match-beginning 0)) + (xml-parse-string)))) ;; DTD for the document ((looking-at "") @@ -350,82 +430,96 @@ Returns one of: ;; Parse this node (let* ((node-name (match-string 1)) - (attr-list (xml-parse-attlist)) - (children (if (consp xml-ns) ;; take care of namespace parsing - (progn - (setq xml-ns (xml-ns-parse-ns-attrs - attr-list xml-ns)) - (list (xml-ns-expand-attr - attr-list xml-ns) - (xml-ns-expand-el - node-name xml-ns))) - (list (xml-intern-attrlist attr-list) - (intern node-name)))) - pos) + ;; Parse the attribute list. + (attrs (xml-parse-attlist xml-ns)) + children pos) + + ;; add the xmlns:* attrs to our cache + (when (consp xml-ns) + (dolist (attr attrs) + (when (and (consp (car attr)) + (equal "http://www.w3.org/2000/xmlns/" + (caar attr))) + (push (cons (cdar attr) (cdr attr)) + xml-ns)))) + + (setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns))) ;; is this an empty element ? (if (looking-at "/>") - (progn - (forward-char 2) - (nreverse children)) - - ;; is this a valid start tag ? - (if (eq (char-after) ?>) (progn - (forward-char 1) - ;; Now check that we have the right end-tag. Note that this - ;; one might contain spaces after the tag name - (let ((end (concat ""))) - (while (not (looking-at end)) - (cond - ((looking-at ") + (progn + (forward-char 1) + ;; Now check that we have the right end-tag. Note that this + ;; one might contain spaces after the tag name + (let ((end (concat ""))) + (while (not (looking-at end)) + (cond + ((looking-at "", but didn't see it.) + (error "XML: (Well-Formed) Couldn't parse tag: %s" + (buffer-substring (- (point) 10) (+ (point) 1))))))) + (t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag) + (unless xml-sub-parser ; Usually, we error out. + (error "XML: (Well-Formed) Invalid character")) + + ;; However, if we're parsing incrementally, then we need to deal + ;; with stray CDATA. + (xml-parse-string))))) + +(defun xml-parse-string () + "Parse the next whatever. Could be a string, or an element." + (let* ((pos (point)) + (string (progn (if (search-forward "<" nil t) + (forward-char -1) + (goto-char (point-max))) + (buffer-substring pos (point))))) + ;; Clean up the string. As per XML specifications, the XML + ;; processor should always pass the whole string to the + ;; application. But \r's should be replaced: + ;; http://www.w3.org/TR/2000/REC-xml-20001006#sec-line-ends + (setq pos 0) + (while (string-match "\r\n?" string pos) + (setq string (replace-match "\n" t t string)) + (setq pos (1+ (match-beginning 0)))) + + (xml-substitute-special string))) + +(defun xml-parse-attlist (&optional xml-ns) + "Return the attribute-list after point. +Leave point at the first non-blank character after the tag." (let ((attlist ()) end-pos name) (skip-syntax-forward " ") (while (looking-at (eval-when-compile (concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*"))) - (setq name (match-string 1)) - (goto-char (match-end 0)) + (setq end-pos (match-end 0)) + (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns)) + (goto-char end-pos) ;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize @@ -435,18 +529,23 @@ first non-blank character after the tag." (setq end-pos (match-end 0)) (if (looking-at "'\\([^']*\\)'") (setq end-pos (match-end 0)) - (error "XML: Attribute values must be given between quotes"))) + (error "XML: (Not Well-Formed) Attribute values must be given between quotes"))) ;; Each attribute must be unique within a given element (if (assoc name attlist) - (error "XML: each attribute must be unique within an element")) + (error "XML: (Not Well-Formed) Each attribute must be unique within an element")) ;; Multiple whitespace characters should be replaced with a single one ;; in the attributes (let ((string (match-string 1)) (pos 0)) (replace-regexp-in-string "\\s-\\{2,\\}" " " string) - (push (cons name (xml-substitute-special string)) attlist)) + (let ((expansion (xml-substitute-special string))) + (unless (stringp expansion) + ; We say this is the constraint. It is acctually that + ; external entities nor "<" can be in an attribute value. + (error "XML: (Not Well-Formed) Entities in attributes cannot expand into elements")) + (push (cons name expansion) attlist))) (goto-char end-pos) (skip-syntax-forward " ")) @@ -465,24 +564,16 @@ first non-blank character after the tag." (defun xml-skip-dtd () "Skip the DTD at point. This follows the rule [28] in the XML specifications." - (forward-char (length "") - (error "XML: invalid DTD (excepting name of the document)")) - (condition-case nil - (progn - (forward-sexp) - (skip-syntax-forward " ") - (if (looking-at "\\[") - (re-search-forward "]\\s-*>") - (search-forward ">"))) - (error (error "XML: No end to the DTD")))) + (let ((xml-validating-parser nil)) + (xml-parse-dtd))) -(defun xml-parse-dtd () +(defun xml-parse-dtd (&optional parse-ns) "Parse the DTD at point." (forward-char (eval-when-compile (length "") - (error "XML: invalid DTD (excepting name of the document)")) + (if (and (looking-at ">") + xml-validating-parser) + (error "XML: (Validity) Invalid DTD (expecting name of the document)")) ;; Get the name of the document (looking-at xml-name-regexp) @@ -500,27 +591,27 @@ This follows the rule [28] in the XML specifications." (re-search-forward "\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'" nil t)) - (error "XML: missing public id")) + (error "XML: Missing Public ID")) (let ((pubid (match-string 1))) + (skip-syntax-forward " ") (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) - (error "XML: missing system id")) + (error "XML: Missing System ID")) (push (list pubid (match-string 1) 'public) dtd))) ((looking-at "SYSTEM\\s-+") (goto-char (match-end 0)) (unless (or (re-search-forward "\\='\\([^']*\\)'" nil t) (re-search-forward "\\=\"\\([^\"]*\\)\"" nil t)) - (error "XML: missing system id")) + (error "XML: Missing System ID")) (push (list (match-string 1) 'system) dtd))) (skip-syntax-forward " ") (if (eq ?> (char-after)) (forward-char) - (skip-syntax-forward " ") (if (not (eq (char-after) ?\[)) - (error "XML: bad DTD") + (error "XML: Bad DTD") (forward-char) ;; Parse the rest of the DTD - ;; Fixme: Deal with ENTITY, ATTLIST, NOTATION, PIs. + ;; Fixme: Deal with NOTATION, PIs. (while (not (looking-at "\\s-*\\]")) (skip-syntax-forward " ") (cond @@ -544,24 +635,87 @@ This follows the rule [28] in the XML specifications." ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution nil) (t - (error "XML: Invalid element type in the DTD"))) + (if xml-validating-parser + (error "XML: (Validity) Invalid element type in the DTD")))) ;; rule [45]: the element declaration must be unique - (if (assoc element dtd) - (error "XML: element declarations must be unique in a DTD (<%s>)" + (if (and (assoc element dtd) + xml-validating-parser) + (error "XML: (Validity) Element declarations must be unique in a DTD (<%s>)" element)) ;; Store the element in the DTD (push (list element type) dtd) (goto-char end-pos)) + + ;; Translation of rule [52] of XML specifications + ((looking-at (concat "")) + + ;; We don't do anything with ATTLIST currently + (goto-char (match-end 0))) + ((looking-at "")) - + ((looking-at (concat "")) + (let ((name (match-string 1)) + (value (substring (match-string 2) 1 + (- (length (match-string 2)) 1)))) + (goto-char (match-end 0)) + (setq xml-entity-alist + (append xml-entity-alist + (list (cons name + (with-temp-buffer + (insert value) + (goto-char (point-min)) + (xml-parse-fragment + xml-validating-parser + parse-ns)))))))) + ((or (looking-at (concat "")) + (looking-at (concat ""))) + (let ((name (match-string 1)) + (file (substring (match-string 2) 1 + (- (length (match-string 2)) 1)))) + (goto-char (match-end 0)) + (setq xml-entity-alist + (append xml-entity-alist + (list (cons name (with-temp-buffer + (insert-file-contents file) + (goto-char (point-min)) + (xml-parse-fragment + xml-validating-parser + parse-ns)))))))) + ;; skip parameter entity declarations + ((or (looking-at (concat "")) + (looking-at (concat ""))) + (goto-char (match-end 0))) + ;; skip parameter entities + ((looking-at (concat "%" xml-name-re ";")) + (goto-char (match-end 0))) (t - (error "XML: Invalid DTD item"))) - - ;; Skip the end of the DTD - (search-forward ">")))) + (when xml-validating-parser + (error "XML: (Validity) Invalid DTD item")))))) + (if (looking-at "\\s-*]>") + (goto-char (match-end 0)))) (nreverse dtd))) (defun xml-parse-elem-type (string) @@ -603,40 +757,73 @@ This follows the rule [28] in the XML specifications." ;;** ;;******************************************************************* -(eval-when-compile - (defvar str)) ; dynamic from replace-regexp-in-string - -;; Fixme: Take declared entities from the DTD when they're available. -(defun xml-substitute-entity (match) - "Subroutine of xml-substitute-special." - (save-match-data - (let ((match1 (match-string 1 str))) - (cond ((string= match1 "lt") "<") - ((string= match1 "gt") ">") - ((string= match1 "apos") "'") - ((string= match1 "quot") "\"") - ((string= match1 "amp") "&") - ((and (string-match "#\\([0-9]+\\)" match1) - (let ((c (decode-char - 'ucs - (string-to-number (match-string 1 match1))))) - (if c (string c))))) ; else unrepresentable - ((and (string-match "#x\\([[:xdigit:]]+\\)" match1) - (let ((c (decode-char - 'ucs - (string-to-number (match-string 1 match1) 16)))) - (if c (string c))))) - ;; Default to asis. Arguably, unrepresentable code points - ;; might be best replaced with U+FFFD. - (t match))))) - (defun xml-substitute-special (string) "Return STRING, after subsituting entity references." ;; This originally made repeated passes through the string from the ;; beginning, which isn't correct, since then either "&amp;" or ;; "&amp;" won't DTRT. - (replace-regexp-in-string "&\\([^;]+\\);" - #'xml-substitute-entity string t t)) + + (let ((point 0) + children end-point) + (while (string-match "&\\([^;]*\\);" string point) + (setq end-point (match-end 0)) + (let* ((this-part (match-string 1 string)) + (prev-part (substring string point (match-beginning 0))) + (entity (assoc this-part xml-entity-alist)) + (expansion + (cond ((string-match "#\\([0-9]+\\)" this-part) + (let ((c (decode-char + 'ucs + (string-to-number (match-string 1 this-part))))) + (if c (string c)))) + ((string-match "#x\\([[:xdigit:]]+\\)" this-part) + (let ((c (decode-char + 'ucs + (string-to-number (match-string 1 this-part) 16)))) + (if c (string c)))) + (entity + (cdr entity)) + ((eq (length this-part) 0) + (error "XML: (Not Well-Formed) No entity given")) + (t + (if xml-validating-parser + (error "XML: (Validity) Undefined entity `%s'" + this-part) + xml-undefined-entity))))) + + (cond ((null children) + ;; FIXME: If we have an entity that expands into XML, this won't work. + (setq children + (concat prev-part expansion))) + ((stringp children) + (if (stringp expansion) + (setq children (concat children prev-part expansion)) + (setq children (list expansion (concat prev-part children))))) + ((and (stringp expansion) + (stringp (car children))) + (setcar children (concat prev-part expansion (car children)))) + ((stringp expansion) + (setq children (append (concat prev-part expansion) + children))) + ((stringp (car children)) + (setcar children (concat (car children) prev-part)) + (setq children (append expansion children))) + (t + (setq children (list expansion + prev-part + children)))) + (setq point end-point))) + (cond ((stringp children) + (concat children (substring string point))) + ((stringp (car (last children))) + (concat (car (last children)) (substring string point))) + ((null children) + string) + (t + (concat (mapconcat 'identity + (nreverse children) + "") + (substring string point)))))) ;;******************************************************************* ;;** @@ -645,9 +832,15 @@ This follows the rule [28] in the XML specifications." ;;** ;;******************************************************************* -(defun xml-debug-print (xml) +(defun xml-debug-print (xml &optional indent-string) + "Outputs the XML in the current buffer. +XML can be a tree or a list of nodes. +The first line is indented with the optional INDENT-STRING." + (setq indent-string (or indent-string "")) (dolist (node xml) - (xml-debug-print-internal node ""))) + (xml-debug-print-internal node indent-string))) + +(defalias 'xml-print 'xml-debug-print) (defun xml-debug-print-internal (xml indent-string) "Outputs the XML tree in the current buffer. @@ -662,24 +855,28 @@ The first line is indented with INDENT-STRING." (insert ?\ (symbol-name (caar attlist)) "=\"" (cdar attlist) ?\") (setq attlist (cdr attlist))) - (insert ?>) - (setq tree (xml-node-children tree)) - ;; output the children - (dolist (node tree) - (cond - ((listp node) - (insert ?\n) - (xml-debug-print-internal node (concat indent-string " "))) - ((stringp node) (insert node)) - (t - (error "Invalid XML tree")))) - - (insert ?\n indent-string - ?< ?/ (symbol-name (xml-node-name xml)) ?>))) + (if (null tree) + (insert ?/ ?>) + (insert ?>) + + ;; output the children + (dolist (node tree) + (cond + ((listp node) + (insert ?\n) + (xml-debug-print-internal node (concat indent-string " "))) + ((stringp node) (insert node)) + (t + (error "Invalid XML tree")))) + + (when (not (and (null (cdr tree)) + (stringp (car tree)))) + (insert ?\n indent-string)) + (insert ?< ?/ (symbol-name (xml-node-name xml)) ?>)))) (provide 'xml) -;;; arch-tag: 5864b283-5a68-4b59-a20d-36a72b353b9b +;; arch-tag: 5864b283-5a68-4b59-a20d-36a72b353b9b ;;; xml.el ends here