X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/59ce725a3b68cbc324f01bc8dc5f9e07286431d1..fee1a3f195e16a6042eea45d012516c900e6f94e:/lisp/emacs-lisp/regexp-opt.el diff --git a/lisp/emacs-lisp/regexp-opt.el b/lisp/emacs-lisp/regexp-opt.el index 00f5bf5227..b538a7a294 100644 --- a/lisp/emacs-lisp/regexp-opt.el +++ b/lisp/emacs-lisp/regexp-opt.el @@ -1,7 +1,6 @@ ;;; regexp-opt.el --- generate efficient regexps to match strings -;; Copyright (C) 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, -;; 2003, 2004, 2005, 2006, 2007, 2008 Free Software Foundation, Inc. +;; Copyright (C) 1994-2011 Free Software Foundation, Inc. ;; Author: Simon Marshall ;; Maintainer: FSF @@ -9,10 +8,10 @@ ;; This file is part of GNU Emacs. -;; GNU Emacs is free software; you can redistribute it and/or modify +;; GNU Emacs is free software: you can redistribute it and/or modify ;; it under the terms of the GNU General Public License as published by -;; the Free Software Foundation; either version 3, or (at your option) -;; any later version. +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. ;; GNU Emacs is distributed in the hope that it will be useful, ;; but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -20,9 +19,7 @@ ;; GNU General Public License for more details. ;; You should have received a copy of the GNU General Public License -;; along with GNU Emacs; see the file COPYING. If not, write to the -;; Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -;; Boston, MA 02110-1301, USA. +;; along with GNU Emacs. If not, see . ;;; Commentary: @@ -98,19 +95,24 @@ The returned regexp is typically more efficient than the equivalent regexp: (concat open (mapconcat 'regexp-quote STRINGS \"\\\\|\") close)) If PAREN is `words', then the resulting regexp is additionally surrounded -by \\=\\< and \\>." +by \\=\\< and \\>. +If PAREN is `symbols', then the resulting regexp is additionally surrounded +by \\=\\_< and \\_>." (save-match-data ;; Recurse on the sorted list. - (let* ((max-lisp-eval-depth (* 1024 1024)) - (max-specpdl-size (* 1024 1024)) + (let* ((max-lisp-eval-depth 10000) + (max-specpdl-size 10000) (completion-ignore-case nil) (completion-regexp-list nil) - (words (eq paren 'words)) (open (cond ((stringp paren) paren) (paren "\\("))) (sorted-strings (delete-dups (sort (copy-sequence strings) 'string-lessp))) - (re (regexp-opt-group sorted-strings open))) - (if words (concat "\\<" re "\\>") re)))) + (re (regexp-opt-group sorted-strings (or open t) (not open)))) + (cond ((eq paren 'words) + (concat "\\<" re "\\>")) + ((eq paren 'symbols) + (concat "\\_<" re "\\_>")) + (t re))))) ;;;###autoload (defun regexp-opt-depth (regexp) @@ -122,7 +124,7 @@ This means the number of non-shy regexp grouping constructs (string-match regexp "") ;; Count the number of open parentheses in REGEXP. (let ((count 0) start last) - (while (string-match "\\\\(\\(\\?:\\)?" regexp start) + (while (string-match "\\\\(\\(\\?[0-9]*:\\)?" regexp start) (setq start (match-end 0)) ; Start of next search. (when (and (not (match-beginning 1)) (subregexp-context-p regexp (match-beginning 0) last)) @@ -138,11 +140,10 @@ This means the number of non-shy regexp grouping constructs (require 'cl)) (defun regexp-opt-group (strings &optional paren lax) - ;; Return a regexp to match a string in the sorted list STRINGS. - ;; If PAREN non-nil, output regexp parentheses around returned regexp. - ;; If LAX non-nil, don't output parentheses if it doesn't require them. - ;; Merges keywords to avoid backtracking in Emacs' regexp matcher. - + "Return a regexp to match a string in the sorted list STRINGS. +If PAREN non-nil, output regexp parentheses around returned regexp. +If LAX non-nil, don't output parentheses if it doesn't require them. +Merges keywords to avoid backtracking in Emacs' regexp matcher." ;; The basic idea is to find the shortest common prefix or suffix, remove it ;; and recurse. If there is no prefix, we divide the list into two so that ;; \(at least) one half will have at least a one-character common prefix. @@ -226,7 +227,7 @@ This means the number of non-shy regexp grouping constructs ;; Otherwise, divide the list into those that start with a ;; particular letter and those that do not, and recurse on them. - (let* ((char (char-to-string (string-to-char (car strings)))) + (let* ((char (substring-no-properties (car strings) 0 1)) (half1 (all-completions char strings)) (half2 (nthcdr (length half1) strings))) (concat open-group @@ -236,9 +237,7 @@ This means the number of non-shy regexp grouping constructs (defun regexp-opt-charset (chars) - ;; - ;; Return a regexp to match a character in CHARS. - ;; + "Return a regexp to match a character in CHARS." ;; The basic idea is to find character ranges. Also we take care in the ;; position of character set meta characters in the character set regexp. ;; @@ -263,13 +262,21 @@ This means the number of non-shy regexp grouping constructs (map-char-table (lambda (c v) (when v - (if (= (1- c) end) (setq end c) - (if (> end (+ start 2)) + (if (consp c) + (if (= (1- (car c)) end) (setq end (cdr c)) + (if (> end (+ start 2)) + (setq charset (format "%s%c-%c" charset start end)) + (while (>= end start) + (setq charset (format "%s%c" charset start)) + (incf start))) + (setq start (car c) end (cdr c))) + (if (= (1- c) end) (setq end c) + (if (> end (+ start 2)) (setq charset (format "%s%c-%c" charset start end)) (while (>= end start) (setq charset (format "%s%c" charset start)) (incf start))) - (setq start c end c)))) + (setq start c end c))))) charmap) (when (>= end start) (if (> end (+ start 2)) @@ -285,5 +292,4 @@ This means the number of non-shy regexp grouping constructs (provide 'regexp-opt) -;; arch-tag: 6c5a66f4-29af-4fd6-8c3b-4b554d5b4370 ;;; regexp-opt.el ends here