Update copyright year to 2016

[gnu-emacs] / lisp / emacs-lisp / rx.el
diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el

index c246d0235f6ca2d389221e6376e706eff4b8a6c0..66d295e221f77deab3b08a47b7fcbe564fa122bf 100644 (file)
--- a/lisp/emacs-lisp/rx.el
+++ b/lisp/emacs-lisp/rx.el
@@ -1,9 +1,9 @@
  ;;; rx.el --- sexp notation for regular expressions
  
-;; Copyright (C) 2001-2012 Free Software Foundation, Inc.
+;; Copyright (C) 2001-2016 Free Software Foundation, Inc.
  
  ;; Author: Gerd Moellmann <gerd@gnu.org>
-;; Maintainer: FSF
+;; Maintainer: emacs-devel@gnu.org
  ;; Keywords: strings, regexps, extensions
  
  ;; This file is part of GNU Emacs.
@@ -35,9 +35,8 @@
  ;; that the `repeat' form can't have multiple regexp args.
  
  ;; Now alternative forms are provided for a degree of compatibility
-;; with Shivers' attempted definitive SRE notation
-;; <URL:http://www.ai.mit.edu/~/shivers/sre.txt>.  SRE forms not
-;; catered for include: dsm, uncase, w/case, w/nocase, ,@<exp>,
+;; with Olin Shivers' attempted definitive SRE notation.  SRE forms
+;; not catered for include: dsm, uncase, w/case, w/nocase, ,@<exp>,
  ;; ,<exp>, (word ...), word+, posix-string, and character class forms.
  ;; Some forms are inconsistent with SRE, either for historical reasons
  ;; or because of the implementation -- simple translation into Emacs
@@ -108,7 +107,9 @@
  
  ;;; Code:
  
-(defconst rx-constituents
+;; FIXME: support macros.
+
+(defvar rx-constituents              ;Not `const' because some modes extend it.
    '((and               . (rx-and 1 nil))
      (seq               . and)          ; SRE
      (:                 . and)          ; SRE
@@ -257,7 +258,8 @@ regular expressions.")
      (not-at-end-of-line                . ?<)
      (not-at-beginning-of-line  . ?>)
      (alpha-numeric-two-byte    . ?A)
-    (chinse-two-byte           . ?C)
+    (chinese-two-byte          . ?C)
+    (chinse-two-byte           . ?C) ;; A typo in Emacs 21.1-24.3.
      (greek-two-byte            . ?G)
      (japanese-hiragana-two-byte . ?H)
      (indian-two-byte           . ?I)
@@ -766,8 +768,8 @@ of all atomic regexps."
       ((= l 3) (string-match "\\`\\(?:\\\\[cCsS_]\\|\\[[^^]\\]\\)" r))
       ((null lax)
        (cond
-       ((string-match "\\`\\[^?\]?\\(?:\\[:[a-z]+:]\\|[^\]]\\)*\\]\\'" r))
-       ((string-match "\\`\\\\(\\(?:[^\\]\\|\\\\[^\)]\\)*\\\\)\\'" r)))))))
+       ((string-match "\\`\\[^?\]?\\(?:\\[:[a-z]+:]\\|[^]]\\)*\\]\\'" r))
+       ((string-match "\\`\\\\(\\(?:[^\\]\\|\\\\[^)]\\)*\\\\)\\'" r)))))))
  
  
  (defun rx-syntax (form)
@@ -813,9 +815,9 @@ of all atomic regexps."
  
  (defun rx-greedy (form)
    "Parse and produce code from FORM.
-If FORM is '(minimal-match FORM1)', non-greedy versions of `*',
+If FORM is `(minimal-match FORM1)', non-greedy versions of `*',
  `+', and `?' operators will be used in FORM1.  If FORM is
-'(maximal-match FORM1)', greedy operators will be used."
+`(maximal-match FORM1)', greedy operators will be used."
    (rx-check form)
    (let ((rx-greedy-flag (eq (car form) 'maximal-match)))
      (rx-form (cadr form) rx-parent)))
@@ -832,27 +834,28 @@ If FORM is '(minimal-match FORM1)', non-greedy versions of `*',
  FORM is a regular expression in sexp form.
  RX-PARENT shows which type of expression calls and controls putting of
  shy groups around the result and some more in other functions."
-  (if (stringp form)
-      (rx-group-if (regexp-quote form)
-                  (if (and (eq rx-parent '*) (< 1 (length form)))
-                      rx-parent))
-    (cond ((integerp form)
-          (regexp-quote (char-to-string form)))
-         ((symbolp form)
-          (let ((info (rx-info form nil)))
-            (cond ((stringp info)
-                   info)
-                  ((null info)
-                   (error "Unknown rx form `%s'" form))
-                  (t
-                   (funcall (nth 0 info) form)))))
-         ((consp form)
-          (let ((info (rx-info (car form) 'head)))
-            (unless (consp info)
-              (error "Unknown rx form `%s'" (car form)))
-            (funcall (nth 0 info) form)))
-         (t
-          (error "rx syntax error at `%s'" form)))))
+  (cond
+   ((stringp form)
+    (rx-group-if (regexp-quote form)
+                 (if (and (eq rx-parent '*) (< 1 (length form)))
+                     rx-parent)))
+   ((integerp form)
+    (regexp-quote (char-to-string form)))
+   ((symbolp form)
+    (let ((info (rx-info form nil)))
+      (cond ((stringp info)
+             info)
+            ((null info)
+             (error "Unknown rx form `%s'" form))
+            (t
+             (funcall (nth 0 info) form)))))
+   ((consp form)
+    (let ((info (rx-info (car form) 'head)))
+      (unless (consp info)
+        (error "Unknown rx form `%s'" (car form)))
+      (funcall (nth 0 info) form)))
+   (t
+    (error "rx syntax error at `%s'" form))))
  
  
  ;;;###autoload
@@ -869,7 +872,7 @@ NO-GROUP non-nil means don't put shy groups around the result."
  REGEXPS is a non-empty sequence of forms of the sort listed below.
  
  Note that `rx' is a Lisp macro; when used in a Lisp program being
- compiled, the translation is performed by the compiler.
+compiled, the translation is performed by the compiler.
  See `rx-to-string' for how to do such a translation at run-time.
  
  The following are valid subforms of regular expressions in sexp
@@ -962,20 +965,20 @@ CHAR
       matches space and tab only.
  
  `graphic', `graph'
-     matches graphic characters--everything except ASCII control chars,
-     space, and DEL.
+     matches graphic characters--everything except whitespace, ASCII
+     and non-ASCII control characters, surrogates, and codepoints
+     unassigned by Unicode.
  
  `printing', `print'
-     matches printing characters--everything except ASCII control chars
-     and DEL.
+     matches whitespace and graphic characters.
  
  `alphanumeric', `alnum'
-     matches letters and digits.  (But at present, for multibyte characters,
-     it matches anything that has word syntax.)
+     matches alphabetic characters and digits.  (For multibyte characters,
+     it matches according to Unicode character properties.)
  
  `letter', `alphabetic', `alpha'
-     matches letters.  (But at present, for multibyte characters,
-     it matches anything that has word syntax.)
+     matches alphabetic characters.  (For multibyte characters,
+     it matches according to Unicode character properties.)
  
  `ascii'
       matches ASCII (unibyte) characters.
@@ -1043,7 +1046,7 @@ CHAR
       `not-at-end-of-line'              (\\c<)
       `not-at-beginning-of-line'                (\\c>)
       `alpha-numeric-two-byte'          (\\cA)
-     `chinse-two-byte'                 (\\cC)
+     `chinese-two-byte'                        (\\cC)
       `greek-two-byte'                  (\\cG)
       `japanese-hiragana-two-byte'      (\\cH)
       `indian-tow-byte'                 (\\cI)