X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/7c82f3e23e37cc848a38b1f8be7149fd672a6393..f15c8583198c3d6c26ca0c0a5b6fb019f98d6c3c:/lisp/emacs-lisp/regexp-opt.el diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el index e2ae390d19..b0fb23dbcc 100644 --- a/lisp/emacs-lisp/regexp-opt.el +++ b/lisp/emacs-lisp/regexp-opt.el @@ -1,10 +1,9 @@ ;;; regexp-opt.el --- generate efficient regexps to match strings -;; Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -;; 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 Free Software Foundation, Inc. +;; Copyright (C) 1994-2015 Free Software Foundation, Inc. ;; Author: Simon Marshall -;; Maintainer: FSF +;; Maintainer: emacs-devel@gnu.org ;; Keywords: strings, regexps, extensions ;; This file is part of GNU Emacs. @@ -137,14 +136,11 @@ This means the number of non-shy regexp grouping constructs ;;; Workhorse functions. -(eval-when-compile - (require 'cl)) - (defun regexp-opt-group (strings &optional paren lax) "Return a regexp to match a string in the sorted list STRINGS. If PAREN non-nil, output regexp parentheses around returned regexp. If LAX non-nil, don't output parentheses if it doesn't require them. -Merges keywords to avoid backtracking in Emacs' regexp matcher." +Merges keywords to avoid backtracking in Emacs's regexp matcher." ;; The basic idea is to find the shortest common prefix or suffix, remove it ;; and recurse. If there is no prefix, we divide the list into two so that ;; \(at least) one half will have at least a one-character common prefix. @@ -209,9 +205,7 @@ Merges keywords to avoid backtracking in Emacs' regexp matcher." (regexp-opt-group suffixes t t) close-group)) - (let* ((sgnirts (mapcar (lambda (s) - (concat (nreverse (string-to-list s)))) - strings)) + (let* ((sgnirts (mapcar #'reverse strings)) (xiffus (try-completion "" sgnirts))) (if (> (length xiffus) 0) ;; common suffix: take it and recurse on the prefixes. @@ -222,8 +216,7 @@ Merges keywords to avoid backtracking in Emacs' regexp matcher." 'string-lessp))) (concat open-group (regexp-opt-group prefixes t t) - (regexp-quote - (concat (nreverse (string-to-list xiffus)))) + (regexp-quote (nreverse xiffus)) close-group)) ;; Otherwise, divide the list into those that start with a @@ -238,7 +231,8 @@ Merges keywords to avoid backtracking in Emacs' regexp matcher." (defun regexp-opt-charset (chars) - "Return a regexp to match a character in CHARS." + "Return a regexp to match a character in CHARS. +CHARS should be a list of characters." ;; The basic idea is to find character ranges. Also we take care in the ;; position of character set meta characters in the character set regexp. ;; @@ -249,15 +243,15 @@ Merges keywords to avoid backtracking in Emacs' regexp matcher." ;; ;; Make a character map but extract character set meta characters. (dolist (char chars) - (case char - (?\] - (setq bracket "]")) - (?^ - (setq caret "^")) - (?- - (setq dash "-")) - (otherwise - (aset charmap char t)))) + (cond + ((eq char ?\]) + (setq bracket "]")) + ((eq char ?^) + (setq caret "^")) + ((eq char ?-) + (setq dash "-")) + (t + (aset charmap char t)))) ;; ;; Make a character set from the map using ranges where applicable. (map-char-table @@ -269,14 +263,14 @@ Merges keywords to avoid backtracking in Emacs' regexp matcher." (setq charset (format "%s%c-%c" charset start end)) (while (>= end start) (setq charset (format "%s%c" charset start)) - (incf start))) + (setq start (1+ start)))) (setq start (car c) end (cdr c))) (if (= (1- c) end) (setq end c) (if (> end (+ start 2)) (setq charset (format "%s%c-%c" charset start end)) (while (>= end start) (setq charset (format "%s%c" charset start)) - (incf start))) + (setq start (1+ start)))) (setq start c end c))))) charmap) (when (>= end start) @@ -284,11 +278,13 @@ Merges keywords to avoid backtracking in Emacs' regexp matcher." (setq charset (format "%s%c-%c" charset start end)) (while (>= end start) (setq charset (format "%s%c" charset start)) - (incf start)))) + (setq start (1+ start))))) ;; ;; Make sure a caret is not first and a dash is first or last. (if (and (string-equal charset "") (string-equal bracket "")) - (concat "[" dash caret "]") + (if (string-equal dash "") + "\\^" ; [^] is not a valid regexp + (concat "[" dash caret "]")) (concat "[" bracket charset caret dash "]")))) (provide 'regexp-opt)