X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/6d12a4dfbcb5680fafac89769e1a2f111fdcc587..b0c9a334c2f0eb881eff47f590997e746cc3bdb3:/lisp/xml.el diff --git a/lisp/xml.el b/lisp/xml.el index 993ef59b27..5fdb6f33b1 100644 --- a/lisp/xml.el +++ b/lisp/xml.el @@ -1,6 +1,7 @@ ;;; xml.el --- XML parser -;; Copyright (C) 2000, 01, 03, 2004 Free Software Foundation, Inc. +;; Copyright (C) 2000, 2001, 2002, 2003, 2004, +;; 2005 Free Software Foundation, Inc. ;; Author: Emmanuel Briot ;; Maintainer: Mark A. Hershberger @@ -20,8 +21,8 @@ ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. ;;; Commentary: @@ -84,6 +85,9 @@ ;;** ;;******************************************************************* +(defconst xml-undefined-entity "?" + "What to substitute for undefined entities") + (defvar xml-entity-alist '(("lt" . "<") ("gt" . ">") @@ -179,7 +183,10 @@ If PARSE-NS is non-nil, then QNAMES are expanded." xml))) -(let* ((start-chars (concat ":[:alpha:]_")) +(defvar xml-name-re) +(defvar xml-entity-value-re) +(defvar xml-att-def-re) +(let* ((start-chars (concat "[:alpha:]:_")) (name-chars (concat "-[:digit:]." start-chars)) ;;[3] S ::= (#x20 | #x9 | #xD | #xA)+ (whitespace "[ \t\n\r]")) @@ -206,6 +213,35 @@ If PARSE-NS is non-nil, then QNAMES are expanded." (defvar xml-pe-reference-re (concat "%" xml-name-re ";")) ;;[67] Reference ::= EntityRef | CharRef (defvar xml-reference-re (concat "\\(?:" xml-entity-ref "\\|" xml-char-ref-re "\\)")) +;;[10] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" + (defvar xml-att-value-re (concat "\\(?:\"\\(?:[^&\"]\\|" xml-reference-re "\\)*\"\\|" + "'\\(?:[^&']\\|" xml-reference-re "\\)*'\\)")) +;;[56] TokenizedType ::= 'ID' [VC: ID] [VC: One ID per Element Type] [VC: ID Attribute Default] +;; | 'IDREF' [VC: IDREF] +;; | 'IDREFS' [VC: IDREF] +;; | 'ENTITY' [VC: Entity Name] +;; | 'ENTITIES' [VC: Entity Name] +;; | 'NMTOKEN' [VC: Name Token] +;; | 'NMTOKENS' [VC: Name Token] + (defvar xml-tokenized-type-re "\\(?:ID\\|IDREF\\|IDREFS\\|ENTITY\\|ENTITIES\\|NMTOKEN\\|NMTOKENS\\)") +;;[58] NotationType ::= 'NOTATION' S '(' S? Name (S? '|' S? Name)* S? ')' + (defvar xml-notation-type-re (concat "\\(?:NOTATION" whitespace "(" whitespace "*" xml-name-re + "\\(?:" whitespace "*|" whitespace "*" xml-name-re "\\)*" whitespace "*)\\)")) +;;[59] Enumeration ::= '(' S? Nmtoken (S? '|' S? Nmtoken)* S? ')' [VC: Enumeration] [VC: No Duplicate Tokens] + (defvar xml-enumeration-re (concat "\\(?:(" whitespace "*" xml-nmtoken-re + "\\(?:" whitespace "*|" whitespace "*" xml-nmtoken-re "\\)*" + whitespace ")\\)")) +;;[57] EnumeratedType ::= NotationType | Enumeration + (defvar xml-enumerated-type-re (concat "\\(?:" xml-notation-type-re "\\|" xml-enumeration-re "\\)")) +;;[54] AttType ::= StringType | TokenizedType | EnumeratedType +;;[55] StringType ::= 'CDATA' + (defvar xml-att-type-re (concat "\\(?:CDATA\\|" xml-tokenized-type-re "\\|" xml-notation-type-re"\\|" xml-enumerated-type-re "\\)")) +;;[60] DefaultDecl ::= '#REQUIRED' | '#IMPLIED' | (('#FIXED' S)? AttValue) + (defvar xml-default-decl-re (concat "\\(?:#REQUIRED\\|#IMPLIED\\|\\(?:#FIXED" whitespace "\\)*" xml-att-value-re "\\)")) +;;[53] AttDef ::= S Name S AttType S DefaultDecl + (defvar xml-att-def-re (concat "\\(?:" whitespace "*" xml-name-re + whitespace "*" xml-att-type-re + whitespace "*" xml-default-decl-re "\\)")) ;;[9] EntityValue ::= '"' ([^%&"] | PEReference | Reference)* '"' ;; | "'" ([^%&'] | PEReference | Reference)* "'" (defvar xml-entity-value-re (concat "\\(?:\"\\(?:[^%&\"]\\|" xml-pe-reference-re @@ -371,7 +407,9 @@ Returns one of: (let ((pos (match-end 0))) (unless (search-forward "]]>" nil t) (error "XML: (Not Well Formed) CDATA section does not end anywhere in the document")) - (buffer-substring pos (match-beginning 0)))) + (concat + (buffer-substring pos (match-beginning 0)) + (xml-parse-string)))) ;; DTD for the document ((looking-at "")) + + ;; We don't do anything with ATTLIST currently + (goto-char (match-end 0))) + ((looking-at "")) ((looking-at (concat "")) - (let ((name (buffer-substring (nth 2 (match-data)) - (nth 3 (match-data)))) - (value (buffer-substring (+ (nth 4 (match-data)) 1) - (- (nth 5 (match-data)) 1)))) - (goto-char (nth 1 (match-data))) + (let ((name (match-string 1)) + (value (substring (match-string 2) 1 + (- (length (match-string 2)) 1)))) + (goto-char (match-end 0)) (setq xml-entity-alist (append xml-entity-alist (list (cons name @@ -637,11 +683,10 @@ This follows the rule [28] in the XML specifications." "\\|'[- \r\na-zA-Z0-9()+,./:=?;!*#@$_%]*'" "[ \t\n\r]+\\(\"[^\"]*\"\\|'[^']*'\\)" "[ \t\n\r]*>"))) - (let ((name (buffer-substring (nth 2 (match-data)) - (nth 3 (match-data)))) - (file (buffer-substring (+ (nth 4 (match-data)) 1) - (- (nth 5 (match-data)) 1)))) - (goto-char (nth 1 (match-data))) + (let ((name (match-string 1)) + (file (substring (match-string 2) 1 + (- (length (match-string 2)) 1)))) + (goto-char (match-end 0)) (setq xml-entity-alist (append xml-entity-alist (list (cons name (with-temp-buffer @@ -650,10 +695,27 @@ This follows the rule [28] in the XML specifications." (xml-parse-fragment xml-validating-parser parse-ns)))))))) + ;; skip parameter entity declarations + ((or (looking-at (concat "")) + (looking-at (concat ""))) + (goto-char (match-end 0))) + ;; skip parameter entities + ((looking-at (concat "%" xml-name-re ";")) + (goto-char (match-end 0))) (t - (error "XML: (Validity) Invalid DTD item"))))) + (when xml-validating-parser + (error "XML: (Validity) Invalid DTD item")))))) (if (looking-at "\\s-*]>") - (goto-char (nth 1 (match-data))))) + (goto-char (match-end 0)))) (nreverse dtd))) (defun xml-parse-elem-type (string) @@ -703,7 +765,7 @@ This follows the rule [28] in the XML specifications." (let ((point 0) children end-point) - (while (string-match "&\\([^;]+\\);" string point) + (while (string-match "&\\([^;]*\\);" string point) (setq end-point (match-end 0)) (let* ((this-part (match-string 1 string)) (prev-part (substring string point (match-beginning 0))) @@ -721,20 +783,18 @@ This follows the rule [28] in the XML specifications." (if c (string c)))) (entity (cdr entity)) + ((eq (length this-part) 0) + (error "XML: (Not Well-Formed) No entity given")) (t (if xml-validating-parser (error "XML: (Validity) Undefined entity `%s'" - (match-string 1 this-part))))))) + this-part) + xml-undefined-entity))))) (cond ((null children) - (if (stringp expansion) - (setq children (concat prev-part expansion)) - (if (stringp (car (last expansion))) - (progn - (setq children - (list (concat prev-part (car expansion)) - (cdr expansion)))) - (setq children (append expansion prev-part))))) + ;; FIXME: If we have an entity that expands into XML, this won't work. + (setq children + (concat prev-part expansion))) ((stringp children) (if (stringp expansion) (setq children (concat children prev-part expansion)) @@ -756,11 +816,15 @@ This follows the rule [28] in the XML specifications." (cond ((stringp children) (concat children (substring string point))) ((stringp (car (last children))) - (concat (car children) (substring string point))) + (concat (car (last children)) (substring string point))) ((null children) string) (t - (nreverse children))))) + (concat (mapconcat 'identity + (nreverse children) + "") + (substring string point)))))) + ;;******************************************************************* ;;** ;;** Printing a tree.