X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/8678d9e413593b0abab296551a20589745c459da..ba3189039adc8ec5eba5ed3e21d42019a4616b7c:/lisp/emacs-lisp/rx.el diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index 7122de4789..7adf46ebff 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el @@ -1,6 +1,6 @@ ;;; rx.el --- sexp notation for regular expressions -;; Copyright (C) 2001-2011 Free Software Foundation, Inc. +;; Copyright (C) 2001-2014 Free Software Foundation, Inc. ;; Author: Gerd Moellmann ;; Maintainer: FSF @@ -35,9 +35,8 @@ ;; that the `repeat' form can't have multiple regexp args. ;; Now alternative forms are provided for a degree of compatibility -;; with Shivers' attempted definitive SRE notation -;; . SRE forms not -;; catered for include: dsm, uncase, w/case, w/nocase, ,@, +;; with Olin Shivers' attempted definitive SRE notation. SRE forms +;; not catered for include: dsm, uncase, w/case, w/nocase, ,@, ;; ,, (word ...), word+, posix-string, and character class forms. ;; Some forms are inconsistent with SRE, either for historical reasons ;; or because of the implementation -- simple translation into Emacs @@ -108,7 +107,9 @@ ;;; Code: -(defconst rx-constituents +;; FIXME: support macros. + +(defvar rx-constituents ;Not `const' because some modes extend it. '((and . (rx-and 1 nil)) (seq . and) ; SRE (: . and) ; SRE @@ -130,6 +131,8 @@ (** . (rx-** 2 nil)) ; SRE (submatch . (rx-submatch 1 nil)) ; SRE (group . submatch) ; sregex + (submatch-n . (rx-submatch-n 2 nil)) + (group-n . submatch-n) (zero-or-more . (rx-kleene 1 nil)) (one-or-more . (rx-kleene 1 nil)) (zero-or-one . (rx-kleene 1 nil)) @@ -391,7 +394,7 @@ FORM is of the form `(and FORM1 ...)'." (defun rx-anything (form) "Match any character." (if (consp form) - (error "rx `anythng' syntax error: %s" form)) + (error "rx `anything' syntax error: %s" form)) (rx-or (list 'or 'not-newline ?\n))) @@ -690,6 +693,16 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'." (mapconcat (lambda (re) (rx-form re ':)) (cdr form) nil)) "\\)")) +(defun rx-submatch-n (form) + "Parse and produce code from FORM, which is `(submatch-n N ...)'." + (let ((n (nth 1 form))) + (concat "\\(?" (number-to-string n) ":" + (if (= 3 (length form)) + ;; Only one sub-form. + (rx-form (nth 2 form)) + ;; Several sub-forms implicitly concatenated. + (mapconcat (lambda (re) (rx-form re ':)) (cddr form) nil)) + "\\)"))) (defun rx-backref (form) "Parse and produce code from FORM, which is `(backref N)'." @@ -820,27 +833,28 @@ If FORM is '(minimal-match FORM1)', non-greedy versions of `*', FORM is a regular expression in sexp form. RX-PARENT shows which type of expression calls and controls putting of shy groups around the result and some more in other functions." - (if (stringp form) - (rx-group-if (regexp-quote form) - (if (and (eq rx-parent '*) (< 1 (length form))) - rx-parent)) - (cond ((integerp form) - (regexp-quote (char-to-string form))) - ((symbolp form) - (let ((info (rx-info form nil))) - (cond ((stringp info) - info) - ((null info) - (error "Unknown rx form `%s'" form)) - (t - (funcall (nth 0 info) form))))) - ((consp form) - (let ((info (rx-info (car form) 'head))) - (unless (consp info) - (error "Unknown rx form `%s'" (car form))) - (funcall (nth 0 info) form))) - (t - (error "rx syntax error at `%s'" form))))) + (cond + ((stringp form) + (rx-group-if (regexp-quote form) + (if (and (eq rx-parent '*) (< 1 (length form))) + rx-parent))) + ((integerp form) + (regexp-quote (char-to-string form))) + ((symbolp form) + (let ((info (rx-info form nil))) + (cond ((stringp info) + info) + ((null info) + (error "Unknown rx form `%s'" form)) + (t + (funcall (nth 0 info) form))))) + ((consp form) + (let ((info (rx-info (car form) 'head))) + (unless (consp info) + (error "Unknown rx form `%s'" (car form))) + (funcall (nth 0 info) form))) + (t + (error "rx syntax error at `%s'" form)))) ;;;###autoload @@ -857,7 +871,7 @@ NO-GROUP non-nil means don't put shy groups around the result." REGEXPS is a non-empty sequence of forms of the sort listed below. Note that `rx' is a Lisp macro; when used in a Lisp program being - compiled, the translation is performed by the compiler. +compiled, the translation is performed by the compiler. See `rx-to-string' for how to do such a translation at run-time. The following are valid subforms of regular expressions in sexp @@ -1072,6 +1086,11 @@ CHAR like `and', but makes the match accessible with `match-end', `match-beginning', and `match-string'. +`(submatch-n N SEXP1 SEXP2 ...)' +`(group-n N SEXP1 SEXP2 ...)' + like `group', but make it an explicitly-numbered group with + group number N. + `(or SEXP1 SEXP2 ...)' `(| SEXP1 SEXP2 ...)' matches anything that matches SEXP1 or SEXP2, etc. If all