X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/114f9c96795aff3b51b9060d7c9c1b77debcc99a..0d8de0fd0a5a63cc9558b5c99f9c7f1ddcaf338a:/lisp/emacs-lisp/rx.el diff --git a/lisp/emacs-lisp/rx.el b/lisp/emacs-lisp/rx.el index 5d04494ecb..56efd14219 100644 --- a/lisp/emacs-lisp/rx.el +++ b/lisp/emacs-lisp/rx.el @@ -1,7 +1,6 @@ ;;; rx.el --- sexp notation for regular expressions -;; Copyright (C) 2001, 2002, 2003, 2004, 2005, -;; 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc. +;; Copyright (C) 2001-2011 Free Software Foundation, Inc. ;; Author: Gerd Moellmann ;; Maintainer: FSF @@ -120,19 +119,19 @@ (nonl . not-newline) ; SRE (anything . (rx-anything 0 nil)) (any . (rx-any 1 nil rx-check-any)) ; inconsistent with SRE + (any . ".") ; sregex (in . any) (char . any) ; sregex (not-char . (rx-not-char 1 nil rx-check-any)) ; sregex (not . (rx-not 1 1 rx-check-not)) - ;; Partially consistent with sregex, whose `repeat' is like our - ;; `**'. (`repeat' with optional max arg and multiple sexp forms - ;; is ambiguous.) - (repeat . (rx-repeat 2 3)) + (repeat . (rx-repeat 2 nil)) (= . (rx-= 2 nil)) ; SRE (>= . (rx->= 2 nil)) ; SRE (** . (rx-** 2 nil)) ; SRE (submatch . (rx-submatch 1 nil)) ; SRE - (group . submatch) + (group . submatch) ; sregex + (submatch-n . (rx-submatch-n 2 nil)) + (group-n . submatch-n) (zero-or-more . (rx-kleene 1 nil)) (one-or-more . (rx-kleene 1 nil)) (zero-or-one . (rx-kleene 1 nil)) @@ -175,6 +174,7 @@ (category . (rx-category 1 1 rx-check-category)) (eval . (rx-eval 1 1)) (regexp . (rx-regexp 1 1 stringp)) + (regex . regexp) ; sregex (digit . "[[:digit:]]") (numeric . digit) ; SRE (num . digit) ; SRE @@ -295,15 +295,27 @@ regular expression strings.") `zero-or-more', and `one-or-more'. Dynamically bound.") -(defun rx-info (op) +(defun rx-info (op head) "Return parsing/code generation info for OP. If OP is the space character ASCII 32, return info for the symbol `?'. If OP is the character `?', return info for the symbol `??'. -See also `rx-constituents'." +See also `rx-constituents'. +If HEAD is non-nil, then OP is the head of a sexp, otherwise it's +a standalone symbol." (cond ((eq op ? ) (setq op '\?)) ((eq op ??) (setq op '\??))) - (while (and (not (null op)) (symbolp op)) - (setq op (cdr (assq op rx-constituents)))) + (let (old-op) + (while (and (not (null op)) (symbolp op)) + (setq old-op op) + (setq op (cdr (assq op rx-constituents))) + (when (if head (stringp op) (consp op)) + ;; We found something but of the wrong kind. Let's look for an + ;; alternate definition for the other case. + (let ((new-op + (cdr (assq old-op (cdr (memq (assq old-op rx-constituents) + rx-constituents)))))) + (if (and new-op (not (if head (stringp new-op) (consp new-op)))) + (setq op new-op)))))) op) @@ -311,7 +323,7 @@ See also `rx-constituents'." "Check FORM according to its car's parsing info." (unless (listp form) (error "rx `%s' needs argument(s)" form)) - (let* ((rx (rx-info (car form))) + (let* ((rx (rx-info (car form) 'head)) (nargs (1- (length form))) (min-args (nth 1 rx)) (max-args (nth 2 rx)) @@ -401,7 +413,7 @@ Only both edges of each range is checked." (setcdr m (1- char))))) ranges)) - + (defun rx-any-condense-range (args) "Condense by side effect ARGS as range for Rx `any'." (let (str @@ -427,7 +439,7 @@ Only both edges of each range is checked." (mapcar (lambda (e) (cond ((= (car e) (cdr e)) (list (car e))) - ;; ((= (1+ (car e)) (cdr e)) (list (car e) (cdr e))) + ((= (1+ (car e)) (cdr e)) (list (car e) (cdr e))) ((list e)))) l)) (delete-dups str)))) @@ -545,7 +557,10 @@ ARG is optional." ((numberp e) (string e)) ((consp e) (if (and (= (1+ (car e)) (cdr e)) - (null (memq (car e) '(?\] ?-)))) + ;; rx-any-condense-range should + ;; prevent this case from happening. + (null (memq (car e) '(?\] ?-))) + (null (memq (cdr e) '(?\] ?-)))) (string (car e) (cdr e)) (string (car e) ?- (cdr e)))) (e))) @@ -561,7 +576,7 @@ ARG is optional." (condition-case nil (rx-form arg) (error "")))) - (eq arg 'word-boundary) + (eq arg 'word-boundary) (and (consp arg) (memq (car arg) '(not any in syntax category)))) (error "rx `not' syntax error: %s" arg)) @@ -640,14 +655,17 @@ If SKIP is non-nil, allow that number of items after the head, i.e. (defun rx-** (form) "Parse and produce code from FORM `(** N M ...)'." (rx-check form) - (setq form (cons 'repeat (cdr (rx-trans-forms form 2)))) - (rx-form form '*)) + (rx-form (cons 'repeat (cdr (rx-trans-forms form 2))) '*)) (defun rx-repeat (form) "Parse and produce code from FORM. -FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'." +FORM is either `(repeat N FORM1)' or `(repeat N M FORMS...)'." (rx-check form) + (if (> (length form) 4) + (setq form (rx-trans-forms form 2))) + (if (null (nth 2 form)) + (setq form (cons (nth 0 form) (cons (nth 1 form) (nthcdr 3 form))))) (cond ((= (length form) 3) (unless (and (integerp (nth 1 form)) (> (nth 1 form) 0)) @@ -674,6 +692,16 @@ FORM is either `(repeat N FORM1)' or `(repeat N M FORM1)'." (mapconcat (lambda (re) (rx-form re ':)) (cdr form) nil)) "\\)")) +(defun rx-submatch-n (form) + "Parse and produce code from FORM, which is `(submatch-n N ...)'." + (let ((n (nth 1 form))) + (concat "\\(?" (number-to-string n) ":" + (if (= 3 (length form)) + ;; Only one sub-form. + (rx-form (nth 2 form)) + ;; Several sub-forms implicitly concatenated. + (mapconcat (lambda (re) (rx-form re ':)) (cddr form) nil)) + "\\)"))) (defun rx-backref (form) "Parse and produce code from FORM, which is `(backref N)'." @@ -746,15 +774,18 @@ of all atomic regexps." "Parse and produce code from FORM, which is `(syntax SYMBOL)'." (rx-check form) (let* ((sym (cadr form)) - (syntax (assq sym rx-syntax))) + (syntax (cdr (assq sym rx-syntax)))) (unless syntax ;; Try sregex compatibility. - (let ((name (symbol-name sym))) - (if (= 1 (length name)) - (setq syntax (rassq (aref name 0) rx-syntax)))) + (cond + ((characterp sym) (setq syntax sym)) + ((symbolp sym) + (let ((name (symbol-name sym))) + (if (= 1 (length name)) + (setq syntax (aref name 0)))))) (unless syntax - (error "Unknown rx syntax `%s'" (cadr form)))) - (format "\\s%c" (cdr syntax)))) + (error "Unknown rx syntax `%s'" sym))) + (format "\\s%c" syntax))) (defun rx-check-category (form) @@ -808,7 +839,7 @@ shy groups around the result and some more in other functions." (cond ((integerp form) (regexp-quote (char-to-string form))) ((symbolp form) - (let ((info (rx-info form))) + (let ((info (rx-info form nil))) (cond ((stringp info) info) ((null info) @@ -816,7 +847,7 @@ shy groups around the result and some more in other functions." (t (funcall (nth 0 info) form))))) ((consp form) - (let ((info (rx-info (car form)))) + (let ((info (rx-info (car form) 'head))) (unless (consp info) (error "Unknown rx form `%s'" (car form))) (funcall (nth 0 info) form))) @@ -1053,8 +1084,10 @@ CHAR like `and', but makes the match accessible with `match-end', `match-beginning', and `match-string'. -`(group SEXP1 SEXP2 ...)' - another name for `submatch'. +`(submatch-n N SEXP1 SEXP2 ...)' +`(group-n N SEXP1 SEXP2 ...)' + like `group', but make it an explicitly-numbered group with + group number N. `(or SEXP1 SEXP2 ...)' `(| SEXP1 SEXP2 ...)' @@ -1144,5 +1177,4 @@ enclosed in `(and ...)'. (provide 'rx) -;; arch-tag: 12d01a63-0008-42bb-ab8c-1c7d63be370b ;;; rx.el ends here