;;; xml.el --- XML parser
;; Copyright (C) 2000, 2001, 2002, 2003, 2004,
-;; 2005, 2006, 2007 Free Software Foundation, Inc.
+;; 2005, 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
;; Author: Emmanuel Briot <briot@gnat.com>
;; Maintainer: Mark A. Hershberger <mah@everybody.org>
;; This file is part of GNU Emacs.
-;; GNU Emacs is free software; you can redistribute it and/or modify
+;; GNU Emacs is free software: you can redistribute it and/or modify
;; it under the terms of the GNU General Public License as published by
-;; the Free Software Foundation; either version 2, or (at your option)
-;; any later version.
+;; the Free Software Foundation, either version 3 of the License, or
+;; (at your option) any later version.
;; GNU Emacs is distributed in the hope that it will be useful,
;; but WITHOUT ANY WARRANTY; without even the implied warranty of
;; GNU General Public License for more details.
;; You should have received a copy of the GNU General Public License
-;; along with GNU Emacs; see the file COPYING. If not, write to the
-;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-;; Boston, MA 02110-1301, USA.
+;; along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>.
;;; Commentary:
;;; Code:
-;; Note that {buffer-substring,match-string}-no-properties were
-;; formerly used in several places, but that removes composition info.
+;; Note that buffer-substring and match-string were formerly used in
+;; several places, because the -no-properties variants remove
+;; composition info. However, after some discussion on emacs-devel,
+;; the consensus was that the speed of the -no-properties variants was
+;; a worthwhile tradeoff especially since we're usually parsing files
+;; instead of hand-crafted XML.
;;*******************************************************************
;;**
(progn
(forward-char -1)
(setq result (xml-parse-tag parse-dtd parse-ns))
- (if (and xml result (not xml-sub-parser))
- ;; translation of rule [1] of XML specifications
- (error "XML: (Not Well-Formed) Only one root tag allowed")
- (cond
- ((null result))
- ((and (listp (car result))
- parse-dtd)
- (setq dtd (car result))
- (if (cdr result) ; possible leading comment
- (add-to-list 'xml (cdr result))))
- (t
- (add-to-list 'xml result)))))
+ (cond
+ ((null result)
+ ;; Not looking at an xml start tag.
+ (forward-char 1))
+ ((and xml (not xml-sub-parser))
+ ;; Translation of rule [1] of XML specifications
+ (error "XML: (Not Well-Formed) Only one root tag allowed"))
+ ((and (listp (car result))
+ parse-dtd)
+ (setq dtd (car result))
+ (if (cdr result) ; possible leading comment
+ (add-to-list 'xml (cdr result))))
+ (t
+ (add-to-list 'xml result))))
(goto-char (point-max))))
(if parse-dtd
(cons dtd (nreverse xml))
(unless (search-forward "]]>" nil t)
(error "XML: (Not Well Formed) CDATA section does not end anywhere in the document"))
(concat
- (buffer-substring pos (match-beginning 0))
+ (buffer-substring-no-properties pos (match-beginning 0))
(xml-parse-string))))
;; DTD for the document
((looking-at "<!DOCTYPE")
(goto-char (match-end 1))
;; Parse this node
- (let* ((node-name (match-string 1))
+ (let* ((node-name (match-string-no-properties 1))
;; Parse the attribute list.
(attrs (xml-parse-attlist xml-ns))
children pos)
(nreverse children)))
;; This was an invalid start tag (Expected ">", but didn't see it.)
(error "XML: (Well-Formed) Couldn't parse tag: %s"
- (buffer-substring (- (point) 10) (+ (point) 1)))))))
+ (buffer-substring-no-properties (- (point) 10) (+ (point) 1)))))))
(t ;; (Not one of PI, CDATA, Comment, End tag, or Start tag)
(unless xml-sub-parser ; Usually, we error out.
(error "XML: (Well-Formed) Invalid character"))
(defun xml-parse-string ()
"Parse the next whatever. Could be a string, or an element."
(let* ((pos (point))
- (string (progn (if (search-forward "<" nil t)
- (forward-char -1)
- (goto-char (point-max)))
- (buffer-substring pos (point)))))
+ (string (progn (skip-chars-forward "^<")
+ (buffer-substring-no-properties pos (point)))))
;; Clean up the string. As per XML specifications, the XML
;; processor should always pass the whole string to the
;; application. But \r's should be replaced:
(while (looking-at (eval-when-compile
(concat "\\(" xml-name-regexp "\\)\\s-*=\\s-*")))
(setq end-pos (match-end 0))
- (setq name (xml-maybe-do-ns (match-string 1) nil xml-ns))
+ (setq name (xml-maybe-do-ns (match-string-no-properties 1) nil xml-ns))
(goto-char end-pos)
;; See also: http://www.w3.org/TR/2000/REC-xml-20001006#AVNormalize
;; Multiple whitespace characters should be replaced with a single one
;; in the attributes
- (let ((string (match-string 1))
+ (let ((string (match-string-no-properties 1))
(pos 0))
(replace-regexp-in-string "\\s-\\{2,\\}" " " string)
(let ((expansion (xml-substitute-special string)))
;; Get the name of the document
(looking-at xml-name-regexp)
- (let ((dtd (list (match-string 0) 'dtd))
+ (let ((dtd (list (match-string-no-properties 0) 'dtd))
type element end-pos)
(goto-char (match-end 0))
"\\='\\([[:space:][:alnum:]-()+,./:=?;!*#@$_%]*\\)'"
nil t))
(error "XML: Missing Public ID"))
- (let ((pubid (match-string 1)))
+ (let ((pubid (match-string-no-properties 1)))
(skip-syntax-forward " ")
(unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
(re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
(error "XML: Missing System ID"))
- (push (list pubid (match-string 1) 'public) dtd)))
+ (push (list pubid (match-string-no-properties 1) 'public) dtd)))
((looking-at "SYSTEM\\s-+")
(goto-char (match-end 0))
(unless (or (re-search-forward "\\='\\([^']*\\)'" nil t)
(re-search-forward "\\=\"\\([^\"]*\\)\"" nil t))
(error "XML: Missing System ID"))
- (push (list (match-string 1) 'system) dtd)))
+ (push (list (match-string-no-properties 1) 'system) dtd)))
(skip-syntax-forward " ")
(if (eq ?> (char-after))
(forward-char)
((looking-at
"<!ELEMENT\\s-+\\([[:alnum:].%;]+\\)\\s-+\\([^>]+\\)>")
- (setq element (match-string 1)
+ (setq element (match-string-no-properties 1)
type (match-string-no-properties 2))
(setq end-pos (match-end 0))
((string-match "^ANY[ \t\n\r]*$" type) ;; any type of contents
(setq type 'any))
((string-match "^(\\(.*\\))[ \t\n\r]*$" type) ;; children ([47])
- (setq type (xml-parse-elem-type (match-string 1 type))))
+ (setq type (xml-parse-elem-type (match-string-no-properties 1 type))))
((string-match "^%[^;]+;[ \t\n\r]*$" type) ;; substitution
nil)
(t
((looking-at (concat "<!ENTITY[ \t\n\r]*\\(" xml-name-re
"\\)[ \t\n\r]*\\(" xml-entity-value-re
"\\)[ \t\n\r]*>"))
- (let ((name (match-string 1))
- (value (substring (match-string 2) 1
- (- (length (match-string 2)) 1))))
+ (let ((name (match-string-no-properties 1))
+ (value (substring (match-string-no-properties 2) 1
+ (- (length (match-string-no-properties 2)) 1))))
(goto-char (match-end 0))
(setq xml-entity-alist
(append xml-entity-alist
"\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'"
"[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)"
"[ \t\n\r]*>")))
- (let ((name (match-string 1))
- (file (substring (match-string 2) 1
- (- (length (match-string 2)) 1))))
+ (let ((name (match-string-no-properties 1))
+ (file (substring (match-string-no-properties 2) 1
+ (- (length (match-string-no-properties 2)) 1))))
(goto-char (match-end 0))
(setq xml-entity-alist
(append xml-entity-alist
(let (elem modifier)
(if (string-match "(\\([^)]+\\))\\([+*?]?\\)" string)
(progn
- (setq elem (match-string 1 string)
- modifier (match-string 2 string))
+ (setq elem (match-string-no-properties 1 string)
+ modifier (match-string-no-properties 2 string))
(if (string-match "|" elem)
(setq elem (cons 'choice
(mapcar 'xml-parse-elem-type
(mapcar 'xml-parse-elem-type
(split-string elem ",")))))))
(if (string-match "[ \t\n\r]*\\([^+*?]+\\)\\([+*?]?\\)" string)
- (setq elem (match-string 1 string)
- modifier (match-string 2 string))))
+ (setq elem (match-string-no-properties 1 string)
+ modifier (match-string-no-properties 2 string))))
(if (and (stringp elem) (string= elem "#PCDATA"))
(setq elem 'pcdata))
children end-point)
(while (string-match "&\\([^;]*\\);" string point)
(setq end-point (match-end 0))
- (let* ((this-part (match-string 1 string))
+ (let* ((this-part (match-string-no-properties 1 string))
(prev-part (substring string point (match-beginning 0)))
(entity (assoc this-part xml-entity-alist))
(expansion
(cond ((string-match "#\\([0-9]+\\)" this-part)
(let ((c (decode-char
'ucs
- (string-to-number (match-string 1 this-part)))))
+ (string-to-number (match-string-no-properties 1 this-part)))))
(if c (string c))))
((string-match "#x\\([[:xdigit:]]+\\)" this-part)
(let ((c (decode-char
'ucs
- (string-to-number (match-string 1 this-part) 16))))
+ (string-to-number (match-string-no-properties 1 this-part) 16))))
(if c (string c))))
(entity
(cdr entity))
"")
(substring string point))))))
+(defun xml-substitute-numeric-entities (string)
+ "Substitute SGML numeric entities by their respective utf characters.
+This function replaces numeric entities in the input STRING and
+returns the modified string. For example \"*\" gets replaced
+by \"*\"."
+ (if (and string (stringp string))
+ (let ((start 0))
+ (while (string-match "&#\\([0-9]+\\);" string start)
+ (condition-case nil
+ (setq string (replace-match
+ (string (read (substring string
+ (match-beginning 1)
+ (match-end 1))))
+ nil nil string))
+ (error nil))
+ (setq start (1+ (match-beginning 0))))
+ string)
+ nil))
+
;;*******************************************************************
;;**
;;** Printing a tree.
(defalias 'xml-print 'xml-debug-print)
+(defun xml-escape-string (string)
+ "Return the string with entity substitutions made from
+xml-entity-alist."
+ (mapconcat (lambda (byte)
+ (let ((char (char-to-string byte)))
+ (if (rassoc char xml-entity-alist)
+ (concat "&" (car (rassoc char xml-entity-alist)) ";")
+ char)))
+ ;; This differs from the non-unicode branch. Just
+ ;; grabbing the string works here.
+ string ""))
+
(defun xml-debug-print-internal (xml indent-string)
"Outputs the XML tree in the current buffer.
The first line is indented with INDENT-STRING."
;; output the attribute list
(setq attlist (xml-node-attributes tree))
(while attlist
- (insert ?\ (symbol-name (caar attlist)) "=\"" (cdar attlist) ?\")
+ (insert ?\ (symbol-name (caar attlist)) "=\""
+ (xml-escape-string (cdar attlist)) ?\")
(setq attlist (cdr attlist)))
(setq tree (xml-node-children tree))
((listp node)
(insert ?\n)
(xml-debug-print-internal node (concat indent-string " ")))
- ((stringp node) (insert node))
+ ((stringp node)
+ (insert (xml-escape-string node)))
(t
(error "Invalid XML tree"))))