X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/b17f53abc28496125965f36147b76ea5f6a2b4fb..562dd5e9532d75d18843a37a1e42a1f4398d4823:/lisp/xml.el diff --git a/lisp/xml.el b/lisp/xml.el index 62a9546144..52bb0de7ea 100644 --- a/lisp/xml.el +++ b/lisp/xml.el @@ -1,7 +1,6 @@ ;;; xml.el --- XML parser -;; Copyright (C) 2000, 2001, 2002, 2003, 2004, -;; 2005, 2006, 2007 Free Software Foundation, Inc. +;; Copyright (C) 2000-2011 Free Software Foundation, Inc. ;; Author: Emmanuel Briot ;; Maintainer: Mark A. Hershberger @@ -9,10 +8,10 @@ ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify +;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 3, or (at your option) -;; any later version. +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -20,9 +19,7 @@ ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -;; Boston, MA 02110-1301, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: @@ -191,7 +188,7 @@ If PARSE-NS is non-nil, then QNAMES are expanded." (name-chars (concat "-[:digit:]." start-chars)) ;;[3] S ::= (#x20 | #x9 | #xD | #xA)+ (whitespace "[ \t\n\r]")) - ;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] + ;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] ;; | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF] ;; | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] ;; | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF] @@ -229,7 +226,7 @@ If PARSE-NS is non-nil, then QNAMES are expanded." (defvar xml-notation-type-re (concat "\\(?:NOTATION" whitespace "(" whitespace "*" xml-name-re "\\(?:" whitespace "*|" whitespace "*" xml-name-re "\\)*" whitespace "*)\\)")) ;;[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration] [VC: No Duplicate Tokens] - (defvar xml-enumeration-re (concat "\\(?:(" whitespace "*" xml-nmtoken-re + (defvar xml-enumeration-re (concat "\\(?:(" whitespace "*" xml-nmtoken-re "\\(?:" whitespace "*|" whitespace "*" xml-nmtoken-re "\\)*" whitespace ")\\)")) ;;[57] EnumeratedType ::= NotationType | Enumeration @@ -250,7 +247,7 @@ If PARSE-NS is non-nil, then QNAMES are expanded." xml-pe-reference-re "\\|" xml-reference-re "\\)*'\\)"))) ;;[75] ExternalID ::= 'SYSTEM' S SystemLiteral ;; | 'PUBLIC' S PubidLiteral S SystemLiteral -;;[76] NDataDecl ::= S 'NDATA' S +;;[76] NDataDecl ::= S 'NDATA' S ;;[73] EntityDef ::= EntityValue| (ExternalID NDataDecl?) ;;[71] GEDecl ::= '' ;;[74] PEDef ::= EntityValue | ExternalID @@ -323,18 +320,20 @@ If PARSE-NS is non-nil, then QNAMES are expanded." (progn (forward-char -1) (setq result (xml-parse-tag parse-dtd parse-ns)) - (if (and xml result (not xml-sub-parser)) - ;; translation of rule [1] of XML specifications - (error "XML: (Not Well-Formed) Only one root tag allowed") - (cond - ((null result)) - ((and (listp (car result)) - parse-dtd) - (setq dtd (car result)) - (if (cdr result) ; possible leading comment - (add-to-list 'xml (cdr result)))) - (t - (add-to-list 'xml result))))) + (cond + ((null result) + ;; Not looking at an xml start tag. + (forward-char 1)) + ((and xml (not xml-sub-parser)) + ;; Translation of rule [1] of XML specifications + (error "XML: (Not Well-Formed) Only one root tag allowed")) + ((and (listp (car result)) + parse-dtd) + (setq dtd (car result)) + (if (cdr result) ; possible leading comment + (add-to-list 'xml (cdr result)))) + (t + (add-to-list 'xml result)))) (goto-char (point-max)))) (if parse-dtd (cons dtd (nreverse xml)) @@ -434,7 +433,7 @@ Returns one of: (let* ((node-name (match-string-no-properties 1)) ;; Parse the attribute list. (attrs (xml-parse-attlist xml-ns)) - children pos) + children) ;; add the xmlns:* attrs to our cache (when (consp xml-ns) @@ -496,9 +495,7 @@ Returns one of: (defun xml-parse-string () "Parse the next whatever. Could be a string, or an element." (let* ((pos (point)) - (string (progn (if (search-forward "<" nil t) - (forward-char -1) - (goto-char (point-max))) + (string (progn (skip-chars-forward "^<") (buffer-substring-no-properties pos (point))))) ;; Clean up the string. As per XML specifications, the XML ;; processor should always pass the whole string to the @@ -539,8 +536,7 @@ Leave point at the first non-blank character after the tag." ;; Multiple whitespace characters should be replaced with a single one ;; in the attributes - (let ((string (match-string-no-properties 1)) - (pos 0)) + (let ((string (match-string-no-properties 1))) (replace-regexp-in-string "\\s-\\{2,\\}" " " string) (let ((expansion (xml-substitute-special string))) (unless (stringp expansion) @@ -637,7 +633,7 @@ This follows the rule [28] in the XML specifications." ((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution nil) (t - (if xml-validating-parser + (if xml-validating-parser (error "XML: (Validity) Invalid element type in the DTD")))) ;; rule [45]: the element declaration must be unique @@ -669,7 +665,7 @@ This follows the rule [28] in the XML specifications." (goto-char (match-end 0)) (setq xml-entity-alist (append xml-entity-alist - (list (cons name + (list (cons name (with-temp-buffer (insert value) (goto-char (point-min)) @@ -772,7 +768,7 @@ This follows the rule [28] in the XML specifications." (let* ((this-part (match-string-no-properties 1 string)) (prev-part (substring string point (match-beginning 0))) (entity (assoc this-part xml-entity-alist)) - (expansion + (expansion (cond ((string-match "#\\([0-9]+\\)" this-part) (let ((c (decode-char 'ucs @@ -827,6 +823,25 @@ This follows the rule [28] in the XML specifications." "") (substring string point)))))) +(defun xml-substitute-numeric-entities (string) + "Substitute SGML numeric entities by their respective utf characters. +This function replaces numeric entities in the input STRING and +returns the modified string. For example \"*\" gets replaced +by \"*\"." + (if (and string (stringp string)) + (let ((start 0)) + (while (string-match "&#\\([0-9]+\\);" string start) + (condition-case nil + (setq string (replace-match + (string (read (substring string + (match-beginning 1) + (match-end 1)))) + nil nil string)) + (error nil)) + (setq start (1+ (match-beginning 0)))) + string) + nil)) + ;;******************************************************************* ;;** ;;** Printing a tree. @@ -852,10 +867,9 @@ xml-entity-alist." (if (rassoc char xml-entity-alist) (concat "&" (car (rassoc char xml-entity-alist)) ";") char))) - (if (multibyte-string-p string) - (encode-coding-string string 'utf-8) - string) - "")) + ;; This differs from the non-unicode branch. Just + ;; grabbing the string works here. + string "")) (defun xml-debug-print-internal (xml indent-string) "Outputs the XML tree in the current buffer. @@ -895,5 +909,4 @@ The first line is indented with INDENT-STRING." (provide 'xml) -;; arch-tag: 5864b283-5a68-4b59-a20d-36a72b353b9b ;;; xml.el ends here