/* String search routines for GNU Emacs.
-Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2012
- Free Software Foundation, Inc.
+Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2013 Free Software
+Foundation, Inc.
This file is part of GNU Emacs.
non-nil, we can use boyer-moore search only if TRT can be
represented by the byte array of 256 elements. For that,
all non-ASCII case-equivalents of all case-sensitive
- characters in STRING must belong to the same charset and
- row. */
+ characters in STRING must belong to the same character
+ group (two characters belong to the same group iff their
+ multibyte forms are the same except for the last byte;
+ i.e. every 64 characters form a group; U+0000..U+003F,
+ U+0040..U+007F, U+0080..U+00BF, ...). */
while (--len >= 0)
{
char_base = 0;
while (--len >= 0)
{
- int c, translated;
+ int c, translated, inverse;
/* If we got here and the RE flag is set, it's because we're
dealing with a regexp known to be trivial, so the backslash
c = *base_pat++;
TRANSLATE (translated, trt, c);
*pat++ = translated;
+ /* Check that none of C's equivalents violates the
+ assumptions of boyer_moore. */
+ TRANSLATE (inverse, inverse_trt, c);
+ while (1)
+ {
+ if (inverse >= 0200)
+ {
+ boyer_moore_ok = 0;
+ break;
+ }
+ if (c == inverse)
+ break;
+ TRANSLATE (inverse, inverse_trt, inverse);
+ }
}
}