;;; xml.el --- XML parser
-;; Copyright (C) 2000, 2001, 2002, 2003, 2004,
-;; 2005, 2006, 2007 Free Software Foundation, Inc.
+;; Copyright (C) 2000-2011 Free Software Foundation, Inc.
;; Author: Emmanuel Briot <briot@gnat.com>
;; Maintainer: Mark A. Hershberger <mah@everybody.org>
;; This file is part of GNU Emacs.
-;; GNU Emacs is free software; you can redistribute it and/or modify
+;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 3, or (at your option)
-;; any later version.
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-;; Boston, MA 02110-1301, USA.
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
(name-chars (concat "-[:digit:]." start-chars))
;;[3] S ::= (#x20 | #x9 | #xD | #xA)+
(whitespace "[ \t\n\r]"))
- ;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6]
+ ;;[4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6]
;; | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] | [#x37F-#x1FFF]
;; | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF]
;; | [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
(defvar xml-notation-type-re (concat "\\(?:NOTATION" whitespace "(" whitespace "*" xml-name-re
"\\(?:" whitespace "*|" whitespace "*" xml-name-re "\\)*" whitespace "*)\\)"))
;;[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration] [VC: No Duplicate Tokens]
- (defvar xml-enumeration-re (concat "\\(?:(" whitespace "*" xml-nmtoken-re
+ (defvar xml-enumeration-re (concat "\\(?:(" whitespace "*" xml-nmtoken-re
"\\(?:" whitespace "*|" whitespace "*" xml-nmtoken-re "\\)*"
whitespace ")\\)"))
;;[57] EnumeratedType ::= NotationType | Enumeration
xml-pe-reference-re "\\|" xml-reference-re "\\)*'\\)")))
;;[75] ExternalID ::= 'SYSTEM' S SystemLiteral
;; | 'PUBLIC' S PubidLiteral S SystemLiteral
-;;[76] NDataDecl ::= S 'NDATA' S
+;;[76] NDataDecl ::= S 'NDATA' S
;;[73] EntityDef ::= EntityValue| (ExternalID NDataDecl?)
;;[71] GEDecl ::= '<!ENTITY' S Name S EntityDef S? '>'
;;[74] PEDef ::= EntityValue | ExternalID
(progn
(forward-char -1)
(setq result (xml-parse-tag parse-dtd parse-ns))
- (if (and xml result (not xml-sub-parser))
- ;; translation of rule [1] of XML specifications
- (error "XML: (Not Well-Formed) Only one root tag allowed")
- (cond
- ((null result))
- ((and (listp (car result))
- parse-dtd)
- (setq dtd (car result))
- (if (cdr result) ; possible leading comment
- (add-to-list 'xml (cdr result))))
- (t
- (add-to-list 'xml result)))))
+ (cond
+ ((null result)
+ ;; Not looking at an xml start tag.
+ (forward-char 1))
+ ((and xml (not xml-sub-parser))
+ ;; Translation of rule [1] of XML specifications
+ (error "XML: (Not Well-Formed) Only one root tag allowed"))
+ ((and (listp (car result))
+ parse-dtd)
+ (setq dtd (car result))
+ (if (cdr result) ; possible leading comment
+ (add-to-list 'xml (cdr result))))
+ (t
+ (add-to-list 'xml result))))
(goto-char (point-max))))
(if parse-dtd
(cons dtd (nreverse xml))
(let* ((node-name (match-string-no-properties 1))
;; Parse the attribute list.
(attrs (xml-parse-attlist xml-ns))
- children pos)
+ children)
;; add the xmlns:* attrs to our cache
(when (consp xml-ns)
(defun xml-parse-string ()
"Parse the next whatever. Could be a string, or an element."
(let* ((pos (point))
- (string (progn (if (search-forward "<" nil t)
- (forward-char -1)
- (goto-char (point-max)))
+ (string (progn (skip-chars-forward "^<")
(buffer-substring-no-properties pos (point)))))
;; Clean up the string. As per XML specifications, the XML
;; processor should always pass the whole string to the
;; Multiple whitespace characters should be replaced with a single one
;; in the attributes
- (let ((string (match-string-no-properties 1))
- (pos 0))
+ (let ((string (match-string-no-properties 1)))
(replace-regexp-in-string "\\s-\\{2,\\}" " " string)
(let ((expansion (xml-substitute-special string)))
(unless (stringp expansion)
((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
nil)
(t
- (if xml-validating-parser
+ (if xml-validating-parser
(error "XML: (Validity) Invalid element type in the DTD"))))
;; rule [45]: the element declaration must be unique
(goto-char (match-end 0))
(setq xml-entity-alist
(append xml-entity-alist
- (list (cons name
+ (list (cons name
(with-temp-buffer
(insert value)
(goto-char (point-min))
(let* ((this-part (match-string-no-properties 1 string))
(prev-part (substring string point (match-beginning 0)))
(entity (assoc this-part xml-entity-alist))
- (expansion
+ (expansion
(cond ((string-match "#\\([0-9]+\\)" this-part)
(let ((c (decode-char
'ucs
"")
(substring string point))))))
+(defun xml-substitute-numeric-entities (string)
+ "Substitute SGML numeric entities by their respective utf characters.
+This function replaces numeric entities in the input STRING and
+returns the modified string. For example \"*\" gets replaced
+by \"*\"."
+ (if (and string (stringp string))
+ (let ((start 0))
+ (while (string-match "&#\\([0-9]+\\);" string start)
+ (condition-case nil
+ (setq string (replace-match
+ (string (read (substring string
+ (match-beginning 1)
+ (match-end 1))))
+ nil nil string))
+ (error nil))
+ (setq start (1+ (match-beginning 0))))
+ string)
+ nil))
+
;;*******************************************************************
;;**
;;** Printing a tree.
(defalias 'xml-print 'xml-debug-print)
(defun xml-escape-string (string)
+ "Return the string with entity substitutions made from
+xml-entity-alist."
(mapconcat (lambda (byte)
(let ((char (char-to-string byte)))
(if (rassoc char xml-entity-alist)
(concat "&" (car (rassoc char xml-entity-alist)) ";")
char)))
- (if (multibyte-string-p string)
- (encode-coding-string string 'utf-8)
- string)
- ""))
+ ;; This differs from the non-unicode branch. Just
+ ;; grabbing the string works here.
+ string ""))
(defun xml-debug-print-internal (xml indent-string)
"Outputs the XML tree in the current buffer.
(provide 'xml)
-;; arch-tag: 5864b283-5a68-4b59-a20d-36a72b353b9b
;;; xml.el ends here