;;; xml.el --- XML parser
-;; Copyright (C) 2000-2012 Free Software Foundation, Inc.
+;; Copyright (C) 2000-2015 Free Software Foundation, Inc.
;; Author: Emmanuel Briot <briot@gnat.com>
;; Maintainer: Mark A. Hershberger <mah@everybody.org>
would be represented by
- '(\"\" . \"foo\")."
+ '(\"\" . \"foo\").
+
+If you'd just like a plain symbol instead, use 'symbol-qnames in
+the PARSE-NS argument."
(car node))
;; [68] EntityRef ::= '&' Name ';'
(defconst xml-entity-ref (concat "&" xml-name-re ";"))
-(defconst xml-entity-or-char-ref-re (concat "&\\(?:#\\(x\\)?\\([0-9]+\\)\\|\\("
+(defconst xml-entity-or-char-ref-re (concat "&\\(?:#\\(x\\)?\\([0-9a-fA-F]+\\)\\|\\("
xml-name-re "\\)\\);"))
;; [69] PEReference ::= '%' Name ';'
"Parse the well-formed XML file FILE.
Return the top node with all its children.
If PARSE-DTD is non-nil, the DTD is parsed rather than skipped.
-If PARSE-NS is non-nil, then QNAMES are expanded."
+
+If PARSE-NS is non-nil, then QNAMES are expanded. By default,
+the variable `xml-default-ns' is the mapping from namespaces to
+URIs, and expanded names will be returned as a cons
+
+ (\"namespace:\" . \"foo\").
+
+If PARSE-NS is an alist, it will be used as the mapping from
+namespace to URIs instead.
+
+If it is the symbol 'symbol-qnames, expanded names will be
+returned as a plain symbol 'namespace:foo instead of a cons.
+
+Both features can be combined by providing a cons cell
+
+ (symbol-qnames . ALIST)."
(with-temp-buffer
(insert-file-contents file)
(xml--parse-buffer parse-dtd parse-ns)))
If BUFFER is nil, it defaults to the current buffer.
If PARSE-DTD is non-nil, parse the DTD and return it as the first
element of the list.
-If PARSE-NS is non-nil, expand QNAMES."
+If PARSE-NS is non-nil, then QNAMES are expanded. By default,
+the variable `xml-default-ns' is the mapping from namespaces to
+URIs, and expanded names will be returned as a cons
+
+ (\"namespace:\" . \"foo\").
+
+If PARSE-NS is an alist, it will be used as the mapping from
+namespace to URIs instead.
+
+If it is the symbol 'symbol-qnames, expanded names will be
+returned as a plain symbol 'namespace:foo instead of a cons.
+
+Both features can be combined by providing a cons cell
+
+ (symbol-qnames . ALIST)."
;; Use fixed syntax table to ensure regexp char classes and syntax
;; specs DTRT.
(unless buffer
During namespace-aware parsing, any name without a namespace is
put into the namespace identified by DEFAULT. nil is used to
-specify that the name shouldn't be given a namespace."
+specify that the name shouldn't be given a namespace.
+Expanded names will by default be returned as a cons. If you
+would like to get plain symbols instead, provide a cons cell
+
+ (symbol-qnames . ALIST)
+
+in the XML-NS argument."
(if (consp xml-ns)
- (let* ((nsp (string-match ":" name))
+ (let* ((symbol-qnames (eq (car-safe xml-ns) 'symbol-qnames))
+ (nsp (string-match ":" name))
(lname (if nsp (substring name (match-end 0)) name))
(prefix (if nsp (substring name 0 (match-beginning 0)) default))
(special (and (string-equal lname "xmlns") (not prefix)))
;; Setting default to nil will insure that there is not
;; matching cons in xml-ns. In which case we
(ns (or (cdr (assoc (if special "xmlns" prefix)
- xml-ns))
+ (if symbol-qnames (cdr xml-ns) xml-ns)))
"")))
- (cons ns (if special "" lname)))
+ (if (and symbol-qnames
+ (not (string= prefix "xmlns")))
+ (intern (concat ns lname))
+ (cons ns (if special "" lname))))
(intern name)))
(defun xml-parse-tag (&optional parse-dtd parse-ns)
"Parse the tag at point.
If PARSE-DTD is non-nil, the DTD of the document, if any, is parsed and
returned as the first element in the list.
-If PARSE-NS is non-nil, expand QNAMES; if the value of PARSE-NS
-is a list, use it as an alist mapping namespaces to URIs.
+If PARSE-NS is non-nil, expand QNAMES; for further details, see
+`xml-parse-region'.
Return one of:
- a list : the matching node
(defun xml-parse-tag-1 (&optional parse-dtd parse-ns)
"Like `xml-parse-tag', but possibly modify the buffer while working."
- (let ((xml-validating-parser (or parse-dtd xml-validating-parser))
- (xml-ns (cond ((consp parse-ns) parse-ns)
- (parse-ns xml-default-ns))))
+ (let* ((xml-validating-parser (or parse-dtd xml-validating-parser))
+ (xml-ns
+ (cond ((eq parse-ns 'symbol-qnames)
+ (cons 'symbol-qnames xml-default-ns))
+ ((or (consp (car-safe parse-ns))
+ (and (eq (car-safe parse-ns) 'symbol-qnames)
+ (listp (cdr parse-ns))))
+ parse-ns)
+ (parse-ns
+ xml-default-ns))))
(cond
;; Processing instructions, like <?xml version="1.0"?>.
- ((looking-at "<\\?")
+ ((looking-at-p "<\\?")
(search-forward "?>")
(skip-syntax-forward " ")
(xml-parse-tag-1 parse-dtd xml-ns))
(buffer-substring-no-properties pos (match-beginning 0))
(xml-parse-string))))
;; DTD for the document
- ((looking-at "<!DOCTYPE[ \t\n\r]")
+ ((looking-at-p "<!DOCTYPE[ \t\n\r]")
(let ((dtd (xml-parse-dtd parse-ns)))
(skip-syntax-forward " ")
(if xml-validating-parser
(cons dtd (xml-parse-tag-1 nil xml-ns))
(xml-parse-tag-1 nil xml-ns))))
;; skip comments
- ((looking-at "<!--")
+ ((looking-at-p "<!--")
(search-forward "-->")
;; FIXME: This loses the skipped-over spaces.
(skip-syntax-forward " ")
(let ((xml-sub-parser t))
(xml-parse-tag-1 parse-dtd xml-ns))))
;; end tag
- ((looking-at "</")
+ ((looking-at-p "</")
'())
;; opening tag
((looking-at (eval-when-compile (concat "<\\(" xml-name-re "\\)")))
(equal "http://www.w3.org/2000/xmlns/"
(caar attr)))
(push (cons (cdar attr) (cdr attr))
- xml-ns))))
+ (if (symbolp (car xml-ns))
+ (cdr xml-ns)
+ xml-ns)))))
(setq children (list attrs (xml-maybe-do-ns node-name "" xml-ns)))
(cond
;; is this an empty element ?
- ((looking-at "/>")
+ ((looking-at-p "/>")
(forward-char 2)
(nreverse children))
;; is this a valid start tag ?
((eobp)
(error "XML: (Not Well-Formed) End of document while reading element `%s'"
node-name))
- ((looking-at "</")
+ ((looking-at-p "</")
(forward-char 2)
(error "XML: (Not Well-Formed) Invalid end tag `%s' (expecting `%s')"
(let ((pos (point)))
(old-remaining-size (- (buffer-size) (point)))
ref val)
(while (and (not (eobp))
- (not (looking-at "<")))
+ (not (looking-at-p "<")))
;; Find the next < or & character.
(skip-chars-forward "^<&")
(when (eq (char-after) ?&)
xml-validating-parser
(error "XML: (Validity) Invalid character reference `%s'"
(match-string 0)))
- (replace-match (or (string val) xml-undefined-entity) t t))
+ (replace-match (if val (string val) xml-undefined-entity) t t))
;; For an entity reference, search again from the start of
;; the replaced text, since the replacement can contain
;; entity or character references, or markup.
(and (null val)
xml-validating-parser
(error "XML: (Validity) Undefined entity `%s'" ref))
- (replace-match (cdr val) t t)
+ (replace-match (or (cdr val) xml-undefined-entity) t t)
(goto-char (match-beginning 0)))
;; Check for XML bombs.
(and xml-entity-expansion-limit
(let ((xml-validating-parser nil))
(xml-parse-dtd)))
-(defun xml-parse-dtd (&optional parse-ns)
+(defun xml-parse-dtd (&optional _parse-ns)
"Parse the DTD at point."
(forward-char (eval-when-compile (length "<!DOCTYPE")))
(skip-syntax-forward " ")
- (if (and (looking-at ">")
+ (if (and (looking-at-p ">")
xml-validating-parser)
(error "XML: (Validity) Invalid DTD (expecting name of the document)"))
;; Parse the rest of the DTD
;; Fixme: Deal with NOTATION, PIs.
- (while (not (looking-at "\\s-*\\]"))
+ (while (not (looking-at-p "\\s-*\\]"))
(skip-syntax-forward " ")
(cond
((eobp)
(end-pos (match-end 0)))
;; Translation of rule [46] of XML specifications
(cond
- ((string-match "\\`EMPTY\\s-*\\'" type) ; empty declaration
+ ((string-match-p "\\`EMPTY\\s-*\\'" type) ; empty declaration
(setq type 'empty))
- ((string-match "\\`ANY\\s-*$" type) ; any type of contents
+ ((string-match-p "\\`ANY\\s-*$" type) ; any type of contents
(setq type 'any))
((string-match "\\`(\\(.*\\))\\s-*\\'" type) ; children ([47])
(setq type (xml-parse-elem-type
(match-string-no-properties 1 type))))
- ((string-match "^%[^;]+;[ \t\n\r]*\\'" type) ; substitution
+ ((string-match-p "^%[^;]+;[ \t\n\r]*\\'" type) ; substitution
nil)
(xml-validating-parser
(error "XML: (Validity) Invalid element type in the DTD")))
(goto-char (match-end 0)))
;; Comments (skip to end, ignoring parameter entity):
- ((looking-at "<!--")
+ ((looking-at-p "<!--")
(search-forward "-->")
(and next-parameter-entity
(> (point) next-parameter-entity)
(unless (looking-at xml-pe-reference-re)
(error "XML: Internal error"))
(let* ((entity (match-string 1))
- (beg (point-marker))
(elt (assoc entity xml-parameter-entity-alist)))
(if elt
(progn
(progn
(setq elem (match-string-no-properties 1 string)
modifier (match-string-no-properties 2 string))
- (if (string-match "|" elem)
+ (if (string-match-p "|" elem)
(setq elem (cons 'choice
(mapcar 'xml-parse-elem-type
(split-string elem "|"))))
- (if (string-match "," elem)
+ (if (string-match-p "," elem)
(setq elem (cons 'seq
(mapcar 'xml-parse-elem-type
(split-string elem ",")))))))
(if (and string (stringp string))
(let ((start 0))
(while (string-match "&#\\([0-9]+\\);" string start)
- (condition-case nil
- (setq string (replace-match
- (string (read (substring string
- (match-beginning 1)
- (match-end 1))))
- nil nil string))
- (error nil))
+ (ignore-errors
+ (setq string (replace-match
+ (string (read (substring string
+ (match-beginning 1)
+ (match-end 1))))
+ nil nil string)))
(setq start (1+ (match-beginning 0))))
string)
nil))
(defalias 'xml-print 'xml-debug-print)
(defun xml-escape-string (string)
- "Return STRING with entity substitutions made from `xml-entity-alist'."
- (mapconcat (lambda (byte)
- (let ((char (char-to-string byte)))
- (if (rassoc char xml-entity-alist)
- (concat "&" (car (rassoc char xml-entity-alist)) ";")
- char)))
- string ""))
+ "Convert STRING into a string containing valid XML character data.
+Replace occurrences of &<>'\" in STRING with their default XML
+entity references (e.g. replace each & with &).
+
+XML character data must not contain & or < characters, nor the >
+character under some circumstances. The XML spec does not impose
+restriction on \" or ', but we just substitute for these too
+\(as is permitted by the spec)."
+ (with-temp-buffer
+ (insert string)
+ (dolist (substitution '(("&" . "&")
+ ("<" . "<")
+ (">" . ">")
+ ("'" . "'")
+ ("\"" . """)))
+ (goto-char (point-min))
+ (while (search-forward (car substitution) nil t)
+ (replace-match (cdr substitution) t t nil)))
+ (buffer-string)))
(defun xml-debug-print-internal (xml indent-string)
"Outputs the XML tree in the current buffer.