]> code.delx.au - gnu-emacs/blobdiff - lisp/character-fold.el
Honor prefix arg in doc-view-next-line-or-next-page
[gnu-emacs] / lisp / character-fold.el
index 49d75bd24eef387c626d127b672fc06ef819c1fa..2d3a8c67fa566aca264a4d2e61a4e2d2f154d455 100644 (file)
@@ -1,6 +1,6 @@
 ;;; character-fold.el --- match unicode to similar ASCII -*- lexical-binding: t; -*-
 
-;; Copyright (C) 2015 Free Software Foundation, Inc.
+;; Copyright (C) 2015-2016 Free Software Foundation, Inc.
 
 ;; Maintainer: emacs-devel@gnu.org
 ;; Keywords: matching
@@ -150,12 +150,13 @@ Any character in STRING that has an entry in
 `character-fold-table' is replaced with that entry (which is a
 regexp) and other characters are `regexp-quote'd.
 
+If the resulting regexp would be too long for Emacs to handle,
+just return the result of calling `regexp-quote' on STRING.
+
 FROM is for internal use.  It specifies an index in the STRING
 from which to start."
   (let* ((spaces 0)
          (multi-char-table (char-table-extra-slot character-fold-table 0))
-         (lower-case-table (current-case-table))
-         (upper-case-table (char-table-extra-slot lower-case-table 0))
          (i (or from 0))
          (end (length string))
          (out nil))
@@ -175,50 +176,48 @@ from which to start."
              (setq spaces 0))
            (let ((regexp (or (aref character-fold-table c)
                              (regexp-quote (string c))))
-                 (alist nil))
-             ;; Long string.  The regexp would probably be too long.
-             (unless (> end 50)
-               (setq alist (aref multi-char-table c))
-               (when case-fold-search
-                 (let ((other-c (aref lower-case-table c)))
-                   (when (or (not other-c)
-                             (eq other-c c))
-                     (setq other-c (aref upper-case-table c)))
-                   (when other-c
-                     (setq alist (append alist (aref multi-char-table other-c)))
-                     (setq regexp (concat "\\(?:" regexp "\\|"
-                                          (or (aref character-fold-table other-c)
-                                              (regexp-quote (string other-c)))
-                                          "\\)"))))))
-             (push (let ((alist-out '("\\)")))
-                     (pcase-dolist (`(,suffix . ,out-regexp) alist)
-                       (let ((len-suf (length suffix)))
+                 ;; Long string.  The regexp would probably be too long.
+                 (alist (unless (> end 50)
+                          (aref multi-char-table c))))
+             (push (let ((matched-entries nil)
+                         (max-length 0))
+                     (dolist (entry alist)
+                       (let* ((suffix (car entry))
+                              (len-suf (length suffix)))
                          (when (eq (compare-strings suffix 0 nil
                                                     string (1+ i) (+ i 1 len-suf)
                                                     nil)
                                    t)
-                           ;; FIXME: If N suffixes match, we "branch"
-                           ;; out into N+1 executions for the rest of
-                           ;; the string.  This involves redundant
-                           ;; work and makes a huge regexp.
-                           (push (concat "\\|" out-regexp
-                                         (character-fold-to-regexp
-                                          string nil (+ i 1 len-suf)))
-                                 alist-out))))
+                           (push (cons len-suf (cdr entry)) matched-entries)
+                           (setq max-length (max max-length len-suf)))))
                      ;; If no suffixes matched, just go on.
-                     (if (not (cdr alist-out))
+                     (if (not matched-entries)
                          regexp
-                       ;; Otherwise, add a branch for the
-                       ;; no-suffix case, and stop the loop here.
-                       (prog1 (apply #'concat "\\(?:" regexp
-                                     (character-fold-to-regexp string nil (1+ i))
-                                     alist-out)
-                         (setq i end))))
+;;; If N suffixes match, we "branch" out into N+1 executions for the
+;;; length of the longest match.  This means "fix" will match "fix" but
+;;; not "fⅸ", but it's necessary to keep the regexp size from scaling
+;;; exponentially.  See https://lists.gnu.org/archive/html/emacs-devel/2015-11/msg02562.html
+                       (let ((subs (substring string (1+ i) (+ i 1 max-length))))
+                         ;; `i' is still going to inc by 1 below.
+                         (setq i (+ i max-length))
+                         (concat
+                          "\\(?:"
+                          (mapconcat (lambda (entry)
+                                       (let ((length (car entry))
+                                             (suffix-regexp (cdr entry)))
+                                         (concat suffix-regexp
+                                                 (character-fold-to-regexp subs nil length))))
+                                     `((0 . ,regexp) . ,matched-entries) "\\|")
+                          "\\)"))))
                    out))))
       (setq i (1+ i)))
     (when (> spaces 0)
       (push (character-fold--make-space-string spaces) out))
-    (apply #'concat (nreverse out))))
+    (let ((regexp (apply #'concat (nreverse out))))
+      ;; Limited by `MAX_BUF_SIZE' in `regex.c'.
+      (if (> (length regexp) 5000)
+          (regexp-quote string)
+        regexp))))
 
 \f
 ;;; Commands provided for completeness.