;;; rx.el --- sexp notation for regular expressions
-;; Copyright (C) 2001-2011 Free Software Foundation, Inc.
+;; Copyright (C) 2001-2012 Free Software Foundation, Inc.
;; Author: Gerd Moellmann <gerd@gnu.org>
;; Maintainer: FSF
;; that the `repeat' form can't have multiple regexp args.
;; Now alternative forms are provided for a degree of compatibility
-;; with Shivers' attempted definitive SRE notation
-;; <URL:http://www.ai.mit.edu/~/shivers/sre.txt>. SRE forms not
-;; catered for include: dsm, uncase, w/case, w/nocase, ,@<exp>,
+;; with Olin Shivers' attempted definitive SRE notation. SRE forms
+;; not catered for include: dsm, uncase, w/case, w/nocase, ,@<exp>,
;; ,<exp>, (word ...), word+, posix-string, and character class forms.
;; Some forms are inconsistent with SRE, either for historical reasons
;; or because of the implementation -- simple translation into Emacs
;;; Code:
-(defconst rx-constituents
+;; FIXME: support macros.
+
+(defvar rx-constituents ;Not `const' because some modes extend it.
'((and . (rx-and 1 nil))
(seq . and) ; SRE
(: . and) ; SRE
(** . (rx-** 2 nil)) ; SRE
(submatch . (rx-submatch 1 nil)) ; SRE
(group . submatch) ; sregex
+ (submatch-n . (rx-submatch-n 2 nil))
+ (group-n . submatch-n)
(zero-or-more . (rx-kleene 1 nil))
(one-or-more . (rx-kleene 1 nil))
(zero-or-one . (rx-kleene 1 nil))
(defun rx-anything (form)
"Match any character."
(if (consp form)
- (error "rx `anythng' syntax error: %s" form))
+ (error "rx `anything' syntax error: %s" form))
(rx-or (list 'or 'not-newline ?\n)))
(mapconcat (lambda (re) (rx-form re ':)) (cdr form) nil))
"\\)"))
+(defun rx-submatch-n (form)
+ "Parse and produce code from FORM, which is `(submatch-n N ...)'."
+ (let ((n (nth 1 form)))
+ (concat "\\(?" (number-to-string n) ":"
+ (if (= 3 (length form))
+ ;; Only one sub-form.
+ (rx-form (nth 2 form))
+ ;; Several sub-forms implicitly concatenated.
+ (mapconcat (lambda (re) (rx-form re ':)) (cddr form) nil))
+ "\\)")))
(defun rx-backref (form)
"Parse and produce code from FORM, which is `(backref N)'."
FORM is a regular expression in sexp form.
RX-PARENT shows which type of expression calls and controls putting of
shy groups around the result and some more in other functions."
- (if (stringp form)
- (rx-group-if (regexp-quote form)
- (if (and (eq rx-parent '*) (< 1 (length form)))
- rx-parent))
- (cond ((integerp form)
- (regexp-quote (char-to-string form)))
- ((symbolp form)
- (let ((info (rx-info form nil)))
- (cond ((stringp info)
- info)
- ((null info)
- (error "Unknown rx form `%s'" form))
- (t
- (funcall (nth 0 info) form)))))
- ((consp form)
- (let ((info (rx-info (car form) 'head)))
- (unless (consp info)
- (error "Unknown rx form `%s'" (car form)))
- (funcall (nth 0 info) form)))
- (t
- (error "rx syntax error at `%s'" form)))))
+ (cond
+ ((stringp form)
+ (rx-group-if (regexp-quote form)
+ (if (and (eq rx-parent '*) (< 1 (length form)))
+ rx-parent)))
+ ((integerp form)
+ (regexp-quote (char-to-string form)))
+ ((symbolp form)
+ (let ((info (rx-info form nil)))
+ (cond ((stringp info)
+ info)
+ ((null info)
+ (error "Unknown rx form `%s'" form))
+ (t
+ (funcall (nth 0 info) form)))))
+ ((consp form)
+ (let ((info (rx-info (car form) 'head)))
+ (unless (consp info)
+ (error "Unknown rx form `%s'" (car form)))
+ (funcall (nth 0 info) form)))
+ (t
+ (error "rx syntax error at `%s'" form))))
;;;###autoload
like `and', but makes the match accessible with `match-end',
`match-beginning', and `match-string'.
+`(submatch-n N SEXP1 SEXP2 ...)'
+`(group-n N SEXP1 SEXP2 ...)'
+ like `group', but make it an explicitly-numbered group with
+ group number N.
+
`(or SEXP1 SEXP2 ...)'
`(| SEXP1 SEXP2 ...)'
matches anything that matches SEXP1 or SEXP2, etc. If all