X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/91fc05b1ae036442400f51ba8a492cb91001e3e2..5dd1c041c7fdb876b52bf33f41e8aeb119282cef:/lisp/emacs-lisp/regexp-opt.el diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el index b746c8a22f..52cbc956bc 100644 --- a/lisp/emacs-lisp/regexp-opt.el +++ b/lisp/emacs-lisp/regexp-opt.el @@ -1,6 +1,7 @@ ;;; regexp-opt.el --- generate efficient regexps to match strings -;; Copyright (C) 1994,95,96,97,98,99,2000 Free Software Foundation, Inc. +;; Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2002, +;; 2003, 2004, 2005, 2006 Free Software Foundation, Inc. ;; Author: Simon Marshall ;; Maintainer: FSF @@ -20,8 +21,8 @@ ;; You should have received a copy of the GNU General Public License ;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 59 Temple Place - Suite 330, -;; Boston, MA 02111-1307, USA. +;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, +;; Boston, MA 02110-1301, USA. ;;; Commentary: @@ -106,25 +107,29 @@ by \\=\\< and \\>." (completion-regexp-list nil) (words (eq paren 'words)) (open (cond ((stringp paren) paren) (paren "\\("))) - (sorted-strings (sort (copy-sequence strings) 'string-lessp)) + (sorted-strings (delete-dups + (sort (copy-sequence strings) 'string-lessp))) (re (regexp-opt-group sorted-strings open))) (if words (concat "\\<" re "\\>") re)))) ;;;###autoload (defun regexp-opt-depth (regexp) "Return the depth of REGEXP. -This means the number of regexp grouping constructs (parenthesised expressions) -in REGEXP." +This means the number of non-shy regexp grouping constructs +\(parenthesized expressions) in REGEXP." (save-match-data ;; Hack to signal an error if REGEXP does not have balanced parentheses. (string-match regexp "") ;; Count the number of open parentheses in REGEXP. - (let ((count 0) start) - (while (string-match "\\(\\`\\|[^\\]\\)\\\\\\(\\\\\\\\\\)*([^?]" - regexp start) - (setq count (1+ count) - ;; Go back 2 chars (one for [^?] and one for [^\\]). - start (- (match-end 0) 2))) + (let ((count 0) start last) + (while (string-match "\\\\(\\(\\?:\\)?" regexp start) + (setq start (match-end 0)) ; Start of next search. + (when (and (not (match-beginning 1)) + (subregexp-context-p regexp (match-beginning 0) last)) + ;; It's not a shy group and it's not inside brackets or after + ;; a backslash: it's really a group-open marker. + (setq last start) ; Speed up next regexp-opt-re-context-p. + (setq count (1+ count)))) count))) ;;; Workhorse functions. @@ -133,18 +138,18 @@ in REGEXP." (require 'cl)) (defun regexp-opt-group (strings &optional paren lax) - "Return a regexp to match a string in STRINGS. -If PAREN non-nil, output regexp parentheses around returned regexp. -If LAX non-nil, don't output parentheses if it doesn't require them. -Merges keywords to avoid backtracking in Emacs' regexp matcher. + ;; Return a regexp to match a string in the sorted list STRINGS. + ;; If PAREN non-nil, output regexp parentheses around returned regexp. + ;; If LAX non-nil, don't output parentheses if it doesn't require them. + ;; Merges keywords to avoid backtracking in Emacs' regexp matcher. -The basic idea is to find the shortest common prefix or suffix, remove it -and recurse. If there is no prefix, we divide the list into two so that -\(at least) one half will have at least a one-character common prefix. + ;; The basic idea is to find the shortest common prefix or suffix, remove it + ;; and recurse. If there is no prefix, we divide the list into two so that + ;; \(at least) one half will have at least a one-character common prefix. -Also we delay the addition of grouping parenthesis as long as possible -until we're sure we need them, and try to remove one-character sequences -so we can use character sets rather than grouping parenthesis." + ;; Also we delay the addition of grouping parenthesis as long as possible + ;; until we're sure we need them, and try to remove one-character sequences + ;; so we can use character sets rather than grouping parenthesis. (let* ((open-group (cond ((stringp paren) paren) (paren "\\(?:") (t ""))) (close-group (if paren "\\)" "")) (open-charset (if lax "" open-group)) @@ -218,7 +223,7 @@ so we can use character sets rather than grouping parenthesis." (regexp-quote (concat (nreverse (string-to-list xiffus)))) close-group)) - + ;; Otherwise, divide the list into those that start with a ;; particular letter and those that do not, and recurse on them. (let* ((char (char-to-string (string-to-char (car strings)))) @@ -280,4 +285,5 @@ so we can use character sets rather than grouping parenthesis." (provide 'regexp-opt) +;; arch-tag: 6c5a66f4-29af-4fd6-8c3b-4b554d5b4370 ;;; regexp-opt.el ends here