code.delx.au - gnu-emacs/blob - src/search.c

   1 /* String search routines for GNU Emacs.
   2
   3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2015 Free Software
   4 Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23
  24 #include "lisp.h"
  25 #include "category.h"
  26 #include "character.h"
  27 #include "buffer.h"
  28 #include "syntax.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* True means regexp was compiled to do full POSIX backtracking.  */
  53   bool posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 static struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 static void set_search_regs (ptrdiff_t, ptrdiff_t);
  88 static void save_search_regs (void);
  89 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
  90                                 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
  91                                 ptrdiff_t, ptrdiff_t);
  92 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
  93                               Lisp_Object, Lisp_Object, ptrdiff_t,
  94                               ptrdiff_t, int);
  95 static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
  96                                 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
  97                                 Lisp_Object, Lisp_Object, bool);
  98
  99 static _Noreturn void
 100 matcher_overflow (void)
 101 {
 102   error ("Stack overflow in regexp matcher");
 103 }
 104
 105 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 106    PATTERN is the pattern to compile.
 107    CP is the place to put the result.
 108    TRANSLATE is a translation table for ignoring case, or nil for none.
 109    POSIX is true if we want full backtracking (POSIX style) for this pattern.
 110    False means backtrack only enough to get a valid match.
 111
 112    The behavior also depends on Vsearch_spaces_regexp.  */
 113
 114 static void
 115 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern,
 116                    Lisp_Object translate, bool posix)
 117 {
 118   char *val;
 119   reg_syntax_t old;
 120
 121   cp->regexp = Qnil;
 122   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 123   cp->posix = posix;
 124   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 125   cp->buf.charset_unibyte = charset_unibyte;
 126   if (STRINGP (Vsearch_spaces_regexp))
 127     cp->whitespace_regexp = Vsearch_spaces_regexp;
 128   else
 129     cp->whitespace_regexp = Qnil;
 130
 131   /* rms: I think BLOCK_INPUT is not needed here any more,
 132      because regex.c defines malloc to call xmalloc.
 133      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 134      So let's turn it off.  */
 135   /*  BLOCK_INPUT;  */
 136   old = re_set_syntax (RE_SYNTAX_EMACS
 137                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 138
 139   if (STRINGP (Vsearch_spaces_regexp))
 140     re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
 141   else
 142     re_set_whitespace_regexp (NULL);
 143
 144   val = (char *) re_compile_pattern (SSDATA (pattern),
 145                                      SBYTES (pattern), &cp->buf);
 146
 147   /* If the compiled pattern hard codes some of the contents of the
 148      syntax-table, it can only be reused with *this* syntax table.  */
 149   cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
 150
 151   re_set_whitespace_regexp (NULL);
 152
 153   re_set_syntax (old);
 154   /* unblock_input ();  */
 155   if (val)
 156     xsignal1 (Qinvalid_regexp, build_string (val));
 157
 158   cp->regexp = Fcopy_sequence (pattern);
 159 }
 160
 161 /* Shrink each compiled regexp buffer in the cache
 162    to the size actually used right now.
 163    This is called from garbage collection.  */
 164
 165 void
 166 shrink_regexp_cache (void)
 167 {
 168   struct regexp_cache *cp;
 169
 170   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 171     {
 172       cp->buf.allocated = cp->buf.used;
 173       cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
 174     }
 175 }
 176
 177 /* Clear the regexp cache w.r.t. a particular syntax table,
 178    because it was changed.
 179    There is no danger of memory leak here because re_compile_pattern
 180    automagically manages the memory in each re_pattern_buffer struct,
 181    based on its `allocated' and `buffer' values.  */
 182 void
 183 clear_regexp_cache (void)
 184 {
 185   int i;
 186
 187   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 188     /* It's tempting to compare with the syntax-table we've actually changed,
 189        but it's not sufficient because char-table inheritance means that
 190        modifying one syntax-table can change others at the same time.  */
 191     if (!EQ (searchbufs[i].syntax_table, Qt))
 192       searchbufs[i].regexp = Qnil;
 193 }
 194
 195 /* Compile a regexp if necessary, but first check to see if there's one in
 196    the cache.
 197    PATTERN is the pattern to compile.
 198    TRANSLATE is a translation table for ignoring case, or nil for none.
 199    REGP is the structure that says where to store the "register"
 200    values that will result from matching this pattern.
 201    If it is 0, we should compile the pattern not to record any
 202    subexpression bounds.
 203    POSIX is true if we want full backtracking (POSIX style) for this pattern.
 204    False means backtrack only enough to get a valid match.  */
 205
 206 struct re_pattern_buffer *
 207 compile_pattern (Lisp_Object pattern, struct re_registers *regp,
 208                  Lisp_Object translate, bool posix, bool multibyte)
 209 {
 210   struct regexp_cache *cp, **cpp;
 211
 212   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 213     {
 214       cp = *cpp;
 215       /* Entries are initialized to nil, and may be set to nil by
 216          compile_pattern_1 if the pattern isn't valid.  Don't apply
 217          string accessors in those cases.  However, compile_pattern_1
 218          is only applied to the cache entry we pick here to reuse.  So
 219          nil should never appear before a non-nil entry.  */
 220       if (NILP (cp->regexp))
 221         goto compile_it;
 222       if (SCHARS (cp->regexp) == SCHARS (pattern)
 223           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 224           && !NILP (Fstring_equal (cp->regexp, pattern))
 225           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 226           && cp->posix == posix
 227           && (EQ (cp->syntax_table, Qt)
 228               || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
 229           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 230           && cp->buf.charset_unibyte == charset_unibyte)
 231         break;
 232
 233       /* If we're at the end of the cache, compile into the nil cell
 234          we found, or the last (least recently used) cell with a
 235          string value.  */
 236       if (cp->next == 0)
 237         {
 238         compile_it:
 239           compile_pattern_1 (cp, pattern, translate, posix);
 240           break;
 241         }
 242     }
 243
 244   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 245      either because we found it in the cache or because we just compiled it.
 246      Move it to the front of the queue to mark it as most recently used.  */
 247   *cpp = cp->next;
 248   cp->next = searchbuf_head;
 249   searchbuf_head = cp;
 250
 251   /* Advise the searching functions about the space we have allocated
 252      for register data.  */
 253   if (regp)
 254     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 255
 256   /* The compiled pattern can be used both for multibyte and unibyte
 257      target.  But, we have to tell which the pattern is used for. */
 258   cp->buf.target_multibyte = multibyte;
 259
 260   return &cp->buf;
 261 }
 262
 263 \f
 264 static Lisp_Object
 265 looking_at_1 (Lisp_Object string, bool posix)
 266 {
 267   Lisp_Object val;
 268   unsigned char *p1, *p2;
 269   ptrdiff_t s1, s2;
 270   register ptrdiff_t i;
 271   struct re_pattern_buffer *bufp;
 272
 273   if (running_asynch_code)
 274     save_search_regs ();
 275
 276   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 277   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 278                          BVAR (current_buffer, case_eqv_table));
 279
 280   CHECK_STRING (string);
 281   bufp = compile_pattern (string,
 282                           (NILP (Vinhibit_changing_match_data)
 283                            ? &search_regs : NULL),
 284                           (!NILP (BVAR (current_buffer, case_fold_search))
 285                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 286                           posix,
 287                           !NILP (BVAR (current_buffer, enable_multibyte_characters)));
 288
 289   immediate_quit = 1;
 290   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 291
 292   /* Get pointers and sizes of the two strings
 293      that make up the visible portion of the buffer. */
 294
 295   p1 = BEGV_ADDR;
 296   s1 = GPT_BYTE - BEGV_BYTE;
 297   p2 = GAP_END_ADDR;
 298   s2 = ZV_BYTE - GPT_BYTE;
 299   if (s1 < 0)
 300     {
 301       p2 = p1;
 302       s2 = ZV_BYTE - BEGV_BYTE;
 303       s1 = 0;
 304     }
 305   if (s2 < 0)
 306     {
 307       s1 = ZV_BYTE - BEGV_BYTE;
 308       s2 = 0;
 309     }
 310
 311   re_match_object = Qnil;
 312
 313   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 314                   PT_BYTE - BEGV_BYTE,
 315                   (NILP (Vinhibit_changing_match_data)
 316                    ? &search_regs : NULL),
 317                   ZV_BYTE - BEGV_BYTE);
 318   immediate_quit = 0;
 319
 320   if (i == -2)
 321     matcher_overflow ();
 322
 323   val = (i >= 0 ? Qt : Qnil);
 324   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 325   {
 326     for (i = 0; i < search_regs.num_regs; i++)
 327       if (search_regs.start[i] >= 0)
 328         {
 329           search_regs.start[i]
 330             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 331          search_regs.end[i]
 332            = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 333        }
 334     /* Set last_thing_searched only when match data is changed.  */
 335     XSETBUFFER (last_thing_searched, current_buffer);
 336   }
 337
 338   return val;
 339 }
 340
 341 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 342        doc: /* Return t if text after point matches regular expression REGEXP.
 343 This function modifies the match data that `match-beginning',
 344 `match-end' and `match-data' access; save and restore the match
 345 data if you want to preserve them.  */)
 346   (Lisp_Object regexp)
 347 {
 348   return looking_at_1 (regexp, 0);
 349 }
 350
 351 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 352        doc: /* Return t if text after point matches regular expression REGEXP.
 353 Find the longest match, in accord with Posix regular expression rules.
 354 This function modifies the match data that `match-beginning',
 355 `match-end' and `match-data' access; save and restore the match
 356 data if you want to preserve them.  */)
 357   (Lisp_Object regexp)
 358 {
 359   return looking_at_1 (regexp, 1);
 360 }
 361 \f
 362 static Lisp_Object
 363 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start,
 364                 bool posix)
 365 {
 366   ptrdiff_t val;
 367   struct re_pattern_buffer *bufp;
 368   EMACS_INT pos;
 369   ptrdiff_t pos_byte, i;
 370
 371   if (running_asynch_code)
 372     save_search_regs ();
 373
 374   CHECK_STRING (regexp);
 375   CHECK_STRING (string);
 376
 377   if (NILP (start))
 378     pos = 0, pos_byte = 0;
 379   else
 380     {
 381       ptrdiff_t len = SCHARS (string);
 382
 383       CHECK_NUMBER (start);
 384       pos = XINT (start);
 385       if (pos < 0 && -pos <= len)
 386         pos = len + pos;
 387       else if (0 > pos || pos > len)
 388         args_out_of_range (string, start);
 389       pos_byte = string_char_to_byte (string, pos);
 390     }
 391
 392   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 393   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 394                          BVAR (current_buffer, case_eqv_table));
 395
 396   bufp = compile_pattern (regexp,
 397                           (NILP (Vinhibit_changing_match_data)
 398                            ? &search_regs : NULL),
 399                           (!NILP (BVAR (current_buffer, case_fold_search))
 400                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 401                           posix,
 402                           STRING_MULTIBYTE (string));
 403   immediate_quit = 1;
 404   re_match_object = string;
 405
 406   val = re_search (bufp, SSDATA (string),
 407                    SBYTES (string), pos_byte,
 408                    SBYTES (string) - pos_byte,
 409                    (NILP (Vinhibit_changing_match_data)
 410                     ? &search_regs : NULL));
 411   immediate_quit = 0;
 412
 413   /* Set last_thing_searched only when match data is changed.  */
 414   if (NILP (Vinhibit_changing_match_data))
 415     last_thing_searched = Qt;
 416
 417   if (val == -2)
 418     matcher_overflow ();
 419   if (val < 0) return Qnil;
 420
 421   if (NILP (Vinhibit_changing_match_data))
 422     for (i = 0; i < search_regs.num_regs; i++)
 423       if (search_regs.start[i] >= 0)
 424         {
 425           search_regs.start[i]
 426             = string_byte_to_char (string, search_regs.start[i]);
 427           search_regs.end[i]
 428             = string_byte_to_char (string, search_regs.end[i]);
 429         }
 430
 431   return make_number (string_byte_to_char (string, val));
 432 }
 433
 434 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 435        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 436 Matching ignores case if `case-fold-search' is non-nil.
 437 If third arg START is non-nil, start search at that index in STRING.
 438 For index of first char beyond the match, do (match-end 0).
 439 `match-end' and `match-beginning' also give indices of substrings
 440 matched by parenthesis constructs in the pattern.
 441
 442 You can use the function `match-string' to extract the substrings
 443 matched by the parenthesis constructions in REGEXP. */)
 444   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 445 {
 446   return string_match_1 (regexp, string, start, 0);
 447 }
 448
 449 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 450        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 451 Find the longest match, in accord with Posix regular expression rules.
 452 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 453 If third arg START is non-nil, start search at that index in STRING.
 454 For index of first char beyond the match, do (match-end 0).
 455 `match-end' and `match-beginning' also give indices of substrings
 456 matched by parenthesis constructs in the pattern.  */)
 457   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 458 {
 459   return string_match_1 (regexp, string, start, 1);
 460 }
 461
 462 /* Match REGEXP against STRING, searching all of STRING,
 463    and return the index of the match, or negative on failure.
 464    This does not clobber the match data.  */
 465
 466 ptrdiff_t
 467 fast_string_match (Lisp_Object regexp, Lisp_Object string)
 468 {
 469   ptrdiff_t val;
 470   struct re_pattern_buffer *bufp;
 471
 472   bufp = compile_pattern (regexp, 0, Qnil,
 473                           0, STRING_MULTIBYTE (string));
 474   immediate_quit = 1;
 475   re_match_object = string;
 476
 477   val = re_search (bufp, SSDATA (string),
 478                    SBYTES (string), 0,
 479                    SBYTES (string), 0);
 480   immediate_quit = 0;
 481   return val;
 482 }
 483
 484 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 485    and return the index of the match, or negative on failure.
 486    This does not clobber the match data.
 487    We assume that STRING contains single-byte characters.  */
 488
 489 ptrdiff_t
 490 fast_c_string_match_ignore_case (Lisp_Object regexp,
 491                                  const char *string, ptrdiff_t len)
 492 {
 493   ptrdiff_t val;
 494   struct re_pattern_buffer *bufp;
 495
 496   regexp = string_make_unibyte (regexp);
 497   re_match_object = Qt;
 498   bufp = compile_pattern (regexp, 0,
 499                           Vascii_canon_table, 0,
 500                           0);
 501   immediate_quit = 1;
 502   val = re_search (bufp, string, len, 0, len, 0);
 503   immediate_quit = 0;
 504   return val;
 505 }
 506
 507 /* Like fast_string_match but ignore case.  */
 508
 509 ptrdiff_t
 510 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
 511 {
 512   ptrdiff_t val;
 513   struct re_pattern_buffer *bufp;
 514
 515   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 516                           0, STRING_MULTIBYTE (string));
 517   immediate_quit = 1;
 518   re_match_object = string;
 519
 520   val = re_search (bufp, SSDATA (string),
 521                    SBYTES (string), 0,
 522                    SBYTES (string), 0);
 523   immediate_quit = 0;
 524   return val;
 525 }
 526 \f
 527 /* Match REGEXP against the characters after POS to LIMIT, and return
 528    the number of matched characters.  If STRING is non-nil, match
 529    against the characters in it.  In that case, POS and LIMIT are
 530    indices into the string.  This function doesn't modify the match
 531    data.  */
 532
 533 ptrdiff_t
 534 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte,
 535                  ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
 536 {
 537   bool multibyte;
 538   struct re_pattern_buffer *buf;
 539   unsigned char *p1, *p2;
 540   ptrdiff_t s1, s2;
 541   ptrdiff_t len;
 542
 543   if (STRINGP (string))
 544     {
 545       if (pos_byte < 0)
 546         pos_byte = string_char_to_byte (string, pos);
 547       if (limit_byte < 0)
 548         limit_byte = string_char_to_byte (string, limit);
 549       p1 = NULL;
 550       s1 = 0;
 551       p2 = SDATA (string);
 552       s2 = SBYTES (string);
 553       re_match_object = string;
 554       multibyte = STRING_MULTIBYTE (string);
 555     }
 556   else
 557     {
 558       if (pos_byte < 0)
 559         pos_byte = CHAR_TO_BYTE (pos);
 560       if (limit_byte < 0)
 561         limit_byte = CHAR_TO_BYTE (limit);
 562       pos_byte -= BEGV_BYTE;
 563       limit_byte -= BEGV_BYTE;
 564       p1 = BEGV_ADDR;
 565       s1 = GPT_BYTE - BEGV_BYTE;
 566       p2 = GAP_END_ADDR;
 567       s2 = ZV_BYTE - GPT_BYTE;
 568       if (s1 < 0)
 569         {
 570           p2 = p1;
 571           s2 = ZV_BYTE - BEGV_BYTE;
 572           s1 = 0;
 573         }
 574       if (s2 < 0)
 575         {
 576           s1 = ZV_BYTE - BEGV_BYTE;
 577           s2 = 0;
 578         }
 579       re_match_object = Qnil;
 580       multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
 581     }
 582
 583   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 584   immediate_quit = 1;
 585   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 586                     pos_byte, NULL, limit_byte);
 587   immediate_quit = 0;
 588
 589   return len;
 590 }
 591
 592 \f
 593 /* The newline cache: remembering which sections of text have no newlines.  */
 594
 595 /* If the user has requested the long scans caching, make sure it's on.
 596    Otherwise, make sure it's off.
 597    This is our cheezy way of associating an action with the change of
 598    state of a buffer-local variable.  */
 599 static struct region_cache *
 600 newline_cache_on_off (struct buffer *buf)
 601 {
 602   struct buffer *base_buf = buf;
 603   bool indirect_p = false;
 604
 605   if (buf->base_buffer)
 606     {
 607       base_buf = buf->base_buffer;
 608       indirect_p = true;
 609     }
 610
 611   /* Don't turn on or off the cache in the base buffer, if the value
 612      of cache-long-scans of the base buffer is inconsistent with that.
 613      This is because doing so will just make the cache pure overhead,
 614      since if we turn it on via indirect buffer, it will be
 615      immediately turned off by its base buffer.  */
 616   if (NILP (BVAR (buf, cache_long_scans)))
 617     {
 618       if (!indirect_p
 619           || NILP (BVAR (base_buf, cache_long_scans)))
 620         {
 621           /* It should be off.  */
 622           if (base_buf->newline_cache)
 623             {
 624               free_region_cache (base_buf->newline_cache);
 625               base_buf->newline_cache = 0;
 626             }
 627         }
 628       return NULL;
 629     }
 630   else
 631     {
 632       if (!indirect_p
 633           || !NILP (BVAR (base_buf, cache_long_scans)))
 634         {
 635           /* It should be on.  */
 636           if (base_buf->newline_cache == 0)
 637             base_buf->newline_cache = new_region_cache ();
 638         }
 639       return base_buf->newline_cache;
 640     }
 641 }
 642
 643 \f
 644 /* Search for COUNT newlines between START/START_BYTE and END/END_BYTE.
 645
 646    If COUNT is positive, search forwards; END must be >= START.
 647    If COUNT is negative, search backwards for the -COUNTth instance;
 648       END must be <= START.
 649    If COUNT is zero, do anything you please; run rogue, for all I care.
 650
 651    If END is zero, use BEGV or ZV instead, as appropriate for the
 652    direction indicated by COUNT.
 653
 654    If we find COUNT instances, set *SHORTAGE to zero, and return the
 655    position past the COUNTth match.  Note that for reverse motion
 656    this is not the same as the usual convention for Emacs motion commands.
 657
 658    If we don't find COUNT instances before reaching END, set *SHORTAGE
 659    to the number of newlines left unfound, and return END.
 660
 661    If BYTEPOS is not NULL, set *BYTEPOS to the byte position corresponding
 662    to the returned character position.
 663
 664    If ALLOW_QUIT, set immediate_quit.  That's good to do
 665    except when inside redisplay.  */
 666
 667 ptrdiff_t
 668 find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
 669               ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
 670               ptrdiff_t *bytepos, bool allow_quit)
 671 {
 672   struct region_cache *newline_cache;
 673   int direction;
 674   struct buffer *cache_buffer;
 675
 676   if (count > 0)
 677     {
 678       direction = 1;
 679       if (!end)
 680         end = ZV, end_byte = ZV_BYTE;
 681     }
 682   else
 683     {
 684       direction = -1;
 685       if (!end)
 686         end = BEGV, end_byte = BEGV_BYTE;
 687     }
 688   if (end_byte == -1)
 689     end_byte = CHAR_TO_BYTE (end);
 690
 691   newline_cache = newline_cache_on_off (current_buffer);
 692   if (current_buffer->base_buffer)
 693     cache_buffer = current_buffer->base_buffer;
 694   else
 695     cache_buffer = current_buffer;
 696
 697   if (shortage != 0)
 698     *shortage = 0;
 699
 700   immediate_quit = allow_quit;
 701
 702   if (count > 0)
 703     while (start != end)
 704       {
 705         /* Our innermost scanning loop is very simple; it doesn't know
 706            about gaps, buffer ends, or the newline cache.  ceiling is
 707            the position of the last character before the next such
 708            obstacle --- the last character the dumb search loop should
 709            examine.  */
 710         ptrdiff_t tem, ceiling_byte = end_byte - 1;
 711
 712         /* If we're using the newline cache, consult it to see whether
 713            we can avoid some scanning.  */
 714         if (newline_cache)
 715           {
 716             ptrdiff_t next_change;
 717             int result = 1;
 718
 719             immediate_quit = 0;
 720             while (start < end && result)
 721               {
 722                 ptrdiff_t lim1;
 723
 724                 result = region_cache_forward (cache_buffer, newline_cache,
 725                                                start, &next_change);
 726                 if (result)
 727                   {
 728                     start = next_change;
 729                     lim1 = next_change = end;
 730                   }
 731                 else
 732                   lim1 = min (next_change, end);
 733
 734                 /* The cache returned zero for this region; see if
 735                    this is because the region is known and includes
 736                    only newlines.  While at that, count any newlines
 737                    we bump into, and exit if we found enough off them.  */
 738                 start_byte = CHAR_TO_BYTE (start);
 739                 while (start < lim1
 740                        && FETCH_BYTE (start_byte) == '\n')
 741                   {
 742                     start_byte++;
 743                     start++;
 744                     if (--count == 0)
 745                       {
 746                         if (bytepos)
 747                           *bytepos = start_byte;
 748                         return start;
 749                       }
 750                   }
 751                 /* If we found a non-newline character before hitting
 752                    position where the cache will again return non-zero
 753                    (i.e. no newlines beyond that position), it means
 754                    this region is not yet known to the cache, and we
 755                    must resort to the "dumb loop" method.  */
 756                 if (start < next_change && !result)
 757                   break;
 758                 result = 1;
 759               }
 760             if (start >= end)
 761               {
 762                 start = end;
 763                 start_byte = end_byte;
 764                 break;
 765               }
 766             immediate_quit = allow_quit;
 767
 768             /* START should never be after END.  */
 769             if (start_byte > ceiling_byte)
 770               start_byte = ceiling_byte;
 771
 772             /* Now the text after start is an unknown region, and
 773                next_change is the position of the next known region. */
 774             ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
 775           }
 776         else if (start_byte == -1)
 777           start_byte = CHAR_TO_BYTE (start);
 778
 779         /* The dumb loop can only scan text stored in contiguous
 780            bytes. BUFFER_CEILING_OF returns the last character
 781            position that is contiguous, so the ceiling is the
 782            position after that.  */
 783         tem = BUFFER_CEILING_OF (start_byte);
 784         ceiling_byte = min (tem, ceiling_byte);
 785
 786         {
 787           /* The termination address of the dumb loop.  */
 788           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
 789           ptrdiff_t lim_byte = ceiling_byte + 1;
 790
 791           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
 792              of the base, the cursor, and the next line.  */
 793           ptrdiff_t base = start_byte - lim_byte;
 794           ptrdiff_t cursor, next;
 795
 796           for (cursor = base; cursor < 0; cursor = next)
 797             {
 798               /* The dumb loop.  */
 799               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
 800               next = nl ? nl - lim_addr : 0;
 801
 802               /* If we're using the newline cache, cache the fact that
 803                  the region we just traversed is free of newlines. */
 804               if (newline_cache && cursor != next)
 805                 {
 806                   know_region_cache (cache_buffer, newline_cache,
 807                                      BYTE_TO_CHAR (lim_byte + cursor),
 808                                      BYTE_TO_CHAR (lim_byte + next));
 809                   /* know_region_cache can relocate buffer text.  */
 810                   lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
 811                 }
 812
 813               if (! nl)
 814                 break;
 815               next++;
 816
 817               if (--count == 0)
 818                 {
 819                   immediate_quit = 0;
 820                   if (bytepos)
 821                     *bytepos = lim_byte + next;
 822                   return BYTE_TO_CHAR (lim_byte + next);
 823                 }
 824             }
 825
 826           start_byte = lim_byte;
 827           start = BYTE_TO_CHAR (start_byte);
 828         }
 829       }
 830   else
 831     while (start > end)
 832       {
 833         /* The last character to check before the next obstacle.  */
 834         ptrdiff_t tem, ceiling_byte = end_byte;
 835
 836         /* Consult the newline cache, if appropriate.  */
 837         if (newline_cache)
 838           {
 839             ptrdiff_t next_change;
 840             int result = 1;
 841
 842             immediate_quit = 0;
 843             while (start > end && result)
 844               {
 845                 ptrdiff_t lim1;
 846
 847                 result = region_cache_backward (cache_buffer, newline_cache,
 848                                                 start, &next_change);
 849                 if (result)
 850                   {
 851                     start = next_change;
 852                     lim1 = next_change = end;
 853                   }
 854                 else
 855                   lim1 = max (next_change, end);
 856                 start_byte = CHAR_TO_BYTE (start);
 857                 while (start > lim1
 858                        && FETCH_BYTE (start_byte - 1) == '\n')
 859                   {
 860                     if (++count == 0)
 861                       {
 862                         if (bytepos)
 863                           *bytepos = start_byte;
 864                         return start;
 865                       }
 866                     start_byte--;
 867                     start--;
 868                   }
 869                 if (start > next_change && !result)
 870                   break;
 871                 result = 1;
 872               }
 873             if (start <= end)
 874               {
 875                 start = end;
 876                 start_byte = end_byte;
 877                 break;
 878               }
 879             immediate_quit = allow_quit;
 880
 881             /* Start should never be at or before end.  */
 882             if (start_byte <= ceiling_byte)
 883               start_byte = ceiling_byte + 1;
 884
 885             /* Now the text before start is an unknown region, and
 886                next_change is the position of the next known region. */
 887             ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
 888           }
 889         else if (start_byte == -1)
 890           start_byte = CHAR_TO_BYTE (start);
 891
 892         /* Stop scanning before the gap.  */
 893         tem = BUFFER_FLOOR_OF (start_byte - 1);
 894         ceiling_byte = max (tem, ceiling_byte);
 895
 896         {
 897           /* The termination address of the dumb loop.  */
 898           unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 899
 900           /* Offsets (relative to CEILING_ADDR and CEILING_BYTE) of
 901              the base, the cursor, and the previous line.  These
 902              offsets are at least -1.  */
 903           ptrdiff_t base = start_byte - ceiling_byte;
 904           ptrdiff_t cursor, prev;
 905
 906           for (cursor = base; 0 < cursor; cursor = prev)
 907             {
 908               unsigned char *nl = memrchr (ceiling_addr, '\n', cursor);
 909               prev = nl ? nl - ceiling_addr : -1;
 910
 911               /* If we're looking for newlines, cache the fact that
 912                  this line's region is free of them. */
 913               if (newline_cache && cursor != prev + 1)
 914                 {
 915                   know_region_cache (cache_buffer, newline_cache,
 916                                      BYTE_TO_CHAR (ceiling_byte + prev + 1),
 917                                      BYTE_TO_CHAR (ceiling_byte + cursor));
 918                   /* know_region_cache can relocate buffer text.  */
 919                   ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 920                 }
 921
 922               if (! nl)
 923                 break;
 924
 925               if (++count >= 0)
 926                 {
 927                   immediate_quit = 0;
 928                   if (bytepos)
 929                     *bytepos = ceiling_byte + prev + 1;
 930                   return BYTE_TO_CHAR (ceiling_byte + prev + 1);
 931                 }
 932             }
 933
 934           start_byte = ceiling_byte;
 935           start = BYTE_TO_CHAR (start_byte);
 936         }
 937       }
 938
 939   immediate_quit = 0;
 940   if (shortage)
 941     *shortage = count * direction;
 942   if (bytepos)
 943     {
 944       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
 945       eassert (*bytepos == CHAR_TO_BYTE (start));
 946     }
 947   return start;
 948 }
 949 \f
 950 /* Search for COUNT instances of a line boundary.
 951    Start at START.  If COUNT is negative, search backwards.
 952
 953    We report the resulting position by calling TEMP_SET_PT_BOTH.
 954
 955    If we find COUNT instances. we position after (always after,
 956    even if scanning backwards) the COUNTth match, and return 0.
 957
 958    If we don't find COUNT instances before reaching the end of the
 959    buffer (or the beginning, if scanning backwards), we return
 960    the number of line boundaries left unfound, and position at
 961    the limit we bumped up against.
 962
 963    If ALLOW_QUIT, set immediate_quit.  That's good to do
 964    except in special cases.  */
 965
 966 ptrdiff_t
 967 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
 968               ptrdiff_t limit, ptrdiff_t limit_byte,
 969               ptrdiff_t count, bool allow_quit)
 970 {
 971   ptrdiff_t charpos, bytepos, shortage;
 972
 973   charpos = find_newline (start, start_byte, limit, limit_byte,
 974                           count, &shortage, &bytepos, allow_quit);
 975   if (shortage)
 976     TEMP_SET_PT_BOTH (limit, limit_byte);
 977   else
 978     TEMP_SET_PT_BOTH (charpos, bytepos);
 979   return shortage;
 980 }
 981
 982 /* Like above, but always scan from point and report the
 983    resulting position in *CHARPOS and *BYTEPOS.  */
 984
 985 ptrdiff_t
 986 scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
 987                          ptrdiff_t *bytepos)
 988 {
 989   ptrdiff_t shortage;
 990
 991   if (count <= 0)
 992     *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
 993                              &shortage, bytepos, 1);
 994   else
 995     *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
 996                              &shortage, bytepos, 1);
 997   return shortage;
 998 }
 999
1000 /* Like find_newline, but doesn't allow QUITting and doesn't return
1001    SHORTAGE.  */
1002 ptrdiff_t
1003 find_newline_no_quit (ptrdiff_t from, ptrdiff_t frombyte,
1004                       ptrdiff_t cnt, ptrdiff_t *bytepos)
1005 {
1006   return find_newline (from, frombyte, 0, -1, cnt, NULL, bytepos, 0);
1007 }
1008
1009 /* Like find_newline, but returns position before the newline, not
1010    after, and only search up to TO.
1011    This isn't just find_newline_no_quit (...)-1, because you might hit TO.  */
1012
1013 ptrdiff_t
1014 find_before_next_newline (ptrdiff_t from, ptrdiff_t to,
1015                           ptrdiff_t cnt, ptrdiff_t *bytepos)
1016 {
1017   ptrdiff_t shortage;
1018   ptrdiff_t pos = find_newline (from, -1, to, -1, cnt, &shortage, bytepos, 1);
1019
1020   if (shortage == 0)
1021     {
1022       if (bytepos)
1023         DEC_BOTH (pos, *bytepos);
1024       else
1025         pos--;
1026     }
1027   return pos;
1028 }
1029 \f
1030 /* Subroutines of Lisp buffer search functions. */
1031
1032 static Lisp_Object
1033 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
1034                 Lisp_Object count, int direction, int RE, bool posix)
1035 {
1036   EMACS_INT np;
1037   EMACS_INT lim;
1038   ptrdiff_t lim_byte;
1039   EMACS_INT n = direction;
1040
1041   if (!NILP (count))
1042     {
1043       CHECK_NUMBER (count);
1044       n *= XINT (count);
1045     }
1046
1047   CHECK_STRING (string);
1048   if (NILP (bound))
1049     {
1050       if (n > 0)
1051         lim = ZV, lim_byte = ZV_BYTE;
1052       else
1053         lim = BEGV, lim_byte = BEGV_BYTE;
1054     }
1055   else
1056     {
1057       CHECK_NUMBER_COERCE_MARKER (bound);
1058       lim = XINT (bound);
1059       if (n > 0 ? lim < PT : lim > PT)
1060         error ("Invalid search bound (wrong side of point)");
1061       if (lim > ZV)
1062         lim = ZV, lim_byte = ZV_BYTE;
1063       else if (lim < BEGV)
1064         lim = BEGV, lim_byte = BEGV_BYTE;
1065       else
1066         lim_byte = CHAR_TO_BYTE (lim);
1067     }
1068
1069   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
1070   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
1071                          BVAR (current_buffer, case_eqv_table));
1072
1073   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1074                       (!NILP (BVAR (current_buffer, case_fold_search))
1075                        ? BVAR (current_buffer, case_canon_table)
1076                        : Qnil),
1077                       (!NILP (BVAR (current_buffer, case_fold_search))
1078                        ? BVAR (current_buffer, case_eqv_table)
1079                        : Qnil),
1080                       posix);
1081   if (np <= 0)
1082     {
1083       if (NILP (noerror))
1084         xsignal1 (Qsearch_failed, string);
1085
1086       if (!EQ (noerror, Qt))
1087         {
1088           eassert (BEGV <= lim && lim <= ZV);
1089           SET_PT_BOTH (lim, lim_byte);
1090           return Qnil;
1091 #if 0 /* This would be clean, but maybe programs depend on
1092          a value of nil here.  */
1093           np = lim;
1094 #endif
1095         }
1096       else
1097         return Qnil;
1098     }
1099
1100   eassert (BEGV <= np && np <= ZV);
1101   SET_PT (np);
1102
1103   return make_number (np);
1104 }
1105 \f
1106 /* Return true if REGEXP it matches just one constant string.  */
1107
1108 static bool
1109 trivial_regexp_p (Lisp_Object regexp)
1110 {
1111   ptrdiff_t len = SBYTES (regexp);
1112   unsigned char *s = SDATA (regexp);
1113   while (--len >= 0)
1114     {
1115       switch (*s++)
1116         {
1117         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1118           return 0;
1119         case '\\':
1120           if (--len < 0)
1121             return 0;
1122           switch (*s++)
1123             {
1124             case '|': case '(': case ')': case '`': case '\'': case 'b':
1125             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1126             case 'S': case '=': case '{': case '}': case '_':
1127             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1128             case '1': case '2': case '3': case '4': case '5':
1129             case '6': case '7': case '8': case '9':
1130               return 0;
1131             }
1132         }
1133     }
1134   return 1;
1135 }
1136
1137 /* Search for the n'th occurrence of STRING in the current buffer,
1138    starting at position POS and stopping at position LIM,
1139    treating STRING as a literal string if RE is false or as
1140    a regular expression if RE is true.
1141
1142    If N is positive, searching is forward and LIM must be greater than POS.
1143    If N is negative, searching is backward and LIM must be less than POS.
1144
1145    Returns -x if x occurrences remain to be found (x > 0),
1146    or else the position at the beginning of the Nth occurrence
1147    (if searching backward) or the end (if searching forward).
1148
1149    POSIX is nonzero if we want full backtracking (POSIX style)
1150    for this pattern.  0 means backtrack only enough to get a valid match.  */
1151
1152 #define TRANSLATE(out, trt, d)                  \
1153 do                                              \
1154   {                                             \
1155     if (! NILP (trt))                           \
1156       {                                         \
1157         Lisp_Object temp;                       \
1158         temp = Faref (trt, make_number (d));    \
1159         if (INTEGERP (temp))                    \
1160           out = XINT (temp);                    \
1161         else                                    \
1162           out = d;                              \
1163       }                                         \
1164     else                                        \
1165       out = d;                                  \
1166   }                                             \
1167 while (0)
1168
1169 /* Only used in search_buffer, to record the end position of the match
1170    when searching regexps and SEARCH_REGS should not be changed
1171    (i.e. Vinhibit_changing_match_data is non-nil).  */
1172 static struct re_registers search_regs_1;
1173
1174 static EMACS_INT
1175 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1176                ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1177                int RE, Lisp_Object trt, Lisp_Object inverse_trt, bool posix)
1178 {
1179   ptrdiff_t len = SCHARS (string);
1180   ptrdiff_t len_byte = SBYTES (string);
1181   register ptrdiff_t i;
1182
1183   if (running_asynch_code)
1184     save_search_regs ();
1185
1186   /* Searching 0 times means don't move.  */
1187   /* Null string is found at starting position.  */
1188   if (len == 0 || n == 0)
1189     {
1190       set_search_regs (pos_byte, 0);
1191       return pos;
1192     }
1193
1194   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1195     {
1196       unsigned char *p1, *p2;
1197       ptrdiff_t s1, s2;
1198       struct re_pattern_buffer *bufp;
1199
1200       bufp = compile_pattern (string,
1201                               (NILP (Vinhibit_changing_match_data)
1202                                ? &search_regs : &search_regs_1),
1203                               trt, posix,
1204                               !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1205
1206       immediate_quit = 1;       /* Quit immediately if user types ^G,
1207                                    because letting this function finish
1208                                    can take too long. */
1209       QUIT;                     /* Do a pending quit right away,
1210                                    to avoid paradoxical behavior */
1211       /* Get pointers and sizes of the two strings
1212          that make up the visible portion of the buffer. */
1213
1214       p1 = BEGV_ADDR;
1215       s1 = GPT_BYTE - BEGV_BYTE;
1216       p2 = GAP_END_ADDR;
1217       s2 = ZV_BYTE - GPT_BYTE;
1218       if (s1 < 0)
1219         {
1220           p2 = p1;
1221           s2 = ZV_BYTE - BEGV_BYTE;
1222           s1 = 0;
1223         }
1224       if (s2 < 0)
1225         {
1226           s1 = ZV_BYTE - BEGV_BYTE;
1227           s2 = 0;
1228         }
1229       re_match_object = Qnil;
1230
1231       while (n < 0)
1232         {
1233           ptrdiff_t val;
1234
1235           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1236                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1237                              (NILP (Vinhibit_changing_match_data)
1238                               ? &search_regs : &search_regs_1),
1239                              /* Don't allow match past current point */
1240                              pos_byte - BEGV_BYTE);
1241           if (val == -2)
1242             {
1243               matcher_overflow ();
1244             }
1245           if (val >= 0)
1246             {
1247               if (NILP (Vinhibit_changing_match_data))
1248                 {
1249                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1250                   for (i = 0; i < search_regs.num_regs; i++)
1251                     if (search_regs.start[i] >= 0)
1252                       {
1253                         search_regs.start[i]
1254                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1255                         search_regs.end[i]
1256                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1257                       }
1258                   XSETBUFFER (last_thing_searched, current_buffer);
1259                   /* Set pos to the new position. */
1260                   pos = search_regs.start[0];
1261                 }
1262               else
1263                 {
1264                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1265                   /* Set pos to the new position.  */
1266                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1267                 }
1268             }
1269           else
1270             {
1271               immediate_quit = 0;
1272               return (n);
1273             }
1274           n++;
1275         }
1276       while (n > 0)
1277         {
1278           ptrdiff_t val;
1279
1280           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1281                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1282                              (NILP (Vinhibit_changing_match_data)
1283                               ? &search_regs : &search_regs_1),
1284                              lim_byte - BEGV_BYTE);
1285           if (val == -2)
1286             {
1287               matcher_overflow ();
1288             }
1289           if (val >= 0)
1290             {
1291               if (NILP (Vinhibit_changing_match_data))
1292                 {
1293                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1294                   for (i = 0; i < search_regs.num_regs; i++)
1295                     if (search_regs.start[i] >= 0)
1296                       {
1297                         search_regs.start[i]
1298                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1299                         search_regs.end[i]
1300                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1301                       }
1302                   XSETBUFFER (last_thing_searched, current_buffer);
1303                   pos = search_regs.end[0];
1304                 }
1305               else
1306                 {
1307                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1308                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1309                 }
1310             }
1311           else
1312             {
1313               immediate_quit = 0;
1314               return (0 - n);
1315             }
1316           n--;
1317         }
1318       immediate_quit = 0;
1319       return (pos);
1320     }
1321   else                          /* non-RE case */
1322     {
1323       unsigned char *raw_pattern, *pat;
1324       ptrdiff_t raw_pattern_size;
1325       ptrdiff_t raw_pattern_size_byte;
1326       unsigned char *patbuf;
1327       bool multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1328       unsigned char *base_pat;
1329       /* Set to positive if we find a non-ASCII char that need
1330          translation.  Otherwise set to zero later.  */
1331       int char_base = -1;
1332       bool boyer_moore_ok = 1;
1333       USE_SAFE_ALLOCA;
1334
1335       /* MULTIBYTE says whether the text to be searched is multibyte.
1336          We must convert PATTERN to match that, or we will not really
1337          find things right.  */
1338
1339       if (multibyte == STRING_MULTIBYTE (string))
1340         {
1341           raw_pattern = SDATA (string);
1342           raw_pattern_size = SCHARS (string);
1343           raw_pattern_size_byte = SBYTES (string);
1344         }
1345       else if (multibyte)
1346         {
1347           raw_pattern_size = SCHARS (string);
1348           raw_pattern_size_byte
1349             = count_size_as_multibyte (SDATA (string),
1350                                        raw_pattern_size);
1351           raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
1352           copy_text (SDATA (string), raw_pattern,
1353                      SCHARS (string), 0, 1);
1354         }
1355       else
1356         {
1357           /* Converting multibyte to single-byte.
1358
1359              ??? Perhaps this conversion should be done in a special way
1360              by subtracting nonascii-insert-offset from each non-ASCII char,
1361              so that only the multibyte chars which really correspond to
1362              the chosen single-byte character set can possibly match.  */
1363           raw_pattern_size = SCHARS (string);
1364           raw_pattern_size_byte = SCHARS (string);
1365           raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
1366           copy_text (SDATA (string), raw_pattern,
1367                      SBYTES (string), 1, 0);
1368         }
1369
1370       /* Copy and optionally translate the pattern.  */
1371       len = raw_pattern_size;
1372       len_byte = raw_pattern_size_byte;
1373       SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
1374       pat = patbuf;
1375       base_pat = raw_pattern;
1376       if (multibyte)
1377         {
1378           /* Fill patbuf by translated characters in STRING while
1379              checking if we can use boyer-moore search.  If TRT is
1380              non-nil, we can use boyer-moore search only if TRT can be
1381              represented by the byte array of 256 elements.  For that,
1382              all non-ASCII case-equivalents of all case-sensitive
1383              characters in STRING must belong to the same character
1384              group (two characters belong to the same group iff their
1385              multibyte forms are the same except for the last byte;
1386              i.e. every 64 characters form a group; U+0000..U+003F,
1387              U+0040..U+007F, U+0080..U+00BF, ...).  */
1388
1389           while (--len >= 0)
1390             {
1391               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1392               int c, translated, inverse;
1393               int in_charlen, charlen;
1394
1395               /* If we got here and the RE flag is set, it's because we're
1396                  dealing with a regexp known to be trivial, so the backslash
1397                  just quotes the next character.  */
1398               if (RE && *base_pat == '\\')
1399                 {
1400                   len--;
1401                   raw_pattern_size--;
1402                   len_byte--;
1403                   base_pat++;
1404                 }
1405
1406               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1407
1408               if (NILP (trt))
1409                 {
1410                   str = base_pat;
1411                   charlen = in_charlen;
1412                 }
1413               else
1414                 {
1415                   /* Translate the character.  */
1416                   TRANSLATE (translated, trt, c);
1417                   charlen = CHAR_STRING (translated, str_base);
1418                   str = str_base;
1419
1420                   /* Check if C has any other case-equivalents.  */
1421                   TRANSLATE (inverse, inverse_trt, c);
1422                   /* If so, check if we can use boyer-moore.  */
1423                   if (c != inverse && boyer_moore_ok)
1424                     {
1425                       /* Check if all equivalents belong to the same
1426                          group of characters.  Note that the check of C
1427                          itself is done by the last iteration.  */
1428                       int this_char_base = -1;
1429
1430                       while (boyer_moore_ok)
1431                         {
1432                           if (ASCII_CHAR_P (inverse))
1433                             {
1434                               if (this_char_base > 0)
1435                                 boyer_moore_ok = 0;
1436                               else
1437                                 this_char_base = 0;
1438                             }
1439                           else if (CHAR_BYTE8_P (inverse))
1440                             /* Boyer-moore search can't handle a
1441                                translation of an eight-bit
1442                                character.  */
1443                             boyer_moore_ok = 0;
1444                           else if (this_char_base < 0)
1445                             {
1446                               this_char_base = inverse & ~0x3F;
1447                               if (char_base < 0)
1448                                 char_base = this_char_base;
1449                               else if (this_char_base != char_base)
1450                                 boyer_moore_ok = 0;
1451                             }
1452                           else if ((inverse & ~0x3F) != this_char_base)
1453                             boyer_moore_ok = 0;
1454                           if (c == inverse)
1455                             break;
1456                           TRANSLATE (inverse, inverse_trt, inverse);
1457                         }
1458                     }
1459                 }
1460
1461               /* Store this character into the translated pattern.  */
1462               memcpy (pat, str, charlen);
1463               pat += charlen;
1464               base_pat += in_charlen;
1465               len_byte -= in_charlen;
1466             }
1467
1468           /* If char_base is still negative we didn't find any translated
1469              non-ASCII characters.  */
1470           if (char_base < 0)
1471             char_base = 0;
1472         }
1473       else
1474         {
1475           /* Unibyte buffer.  */
1476           char_base = 0;
1477           while (--len >= 0)
1478             {
1479               int c, translated, inverse;
1480
1481               /* If we got here and the RE flag is set, it's because we're
1482                  dealing with a regexp known to be trivial, so the backslash
1483                  just quotes the next character.  */
1484               if (RE && *base_pat == '\\')
1485                 {
1486                   len--;
1487                   raw_pattern_size--;
1488                   base_pat++;
1489                 }
1490               c = *base_pat++;
1491               TRANSLATE (translated, trt, c);
1492               *pat++ = translated;
1493               /* Check that none of C's equivalents violates the
1494                  assumptions of boyer_moore.  */
1495               TRANSLATE (inverse, inverse_trt, c);
1496               while (1)
1497                 {
1498                   if (inverse >= 0200)
1499                     {
1500                       boyer_moore_ok = 0;
1501                       break;
1502                     }
1503                   if (c == inverse)
1504                     break;
1505                   TRANSLATE (inverse, inverse_trt, inverse);
1506                 }
1507             }
1508         }
1509
1510       len_byte = pat - patbuf;
1511       pat = base_pat = patbuf;
1512
1513       EMACS_INT result
1514         = (boyer_moore_ok
1515            ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
1516                           pos_byte, lim_byte,
1517                           char_base)
1518            : simple_search (n, pat, raw_pattern_size, len_byte, trt,
1519                             pos, pos_byte, lim, lim_byte));
1520       SAFE_FREE ();
1521       return result;
1522     }
1523 }
1524 \f
1525 /* Do a simple string search N times for the string PAT,
1526    whose length is LEN/LEN_BYTE,
1527    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1528    TRT is the translation table.
1529
1530    Return the character position where the match is found.
1531    Otherwise, if M matches remained to be found, return -M.
1532
1533    This kind of search works regardless of what is in PAT and
1534    regardless of what is in TRT.  It is used in cases where
1535    boyer_moore cannot work.  */
1536
1537 static EMACS_INT
1538 simple_search (EMACS_INT n, unsigned char *pat,
1539                ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1540                ptrdiff_t pos, ptrdiff_t pos_byte,
1541                ptrdiff_t lim, ptrdiff_t lim_byte)
1542 {
1543   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1544   bool forward = n > 0;
1545   /* Number of buffer bytes matched.  Note that this may be different
1546      from len_byte in a multibyte buffer.  */
1547   ptrdiff_t match_byte = PTRDIFF_MIN;
1548
1549   if (lim > pos && multibyte)
1550     while (n > 0)
1551       {
1552         while (1)
1553           {
1554             /* Try matching at position POS.  */
1555             ptrdiff_t this_pos = pos;
1556             ptrdiff_t this_pos_byte = pos_byte;
1557             ptrdiff_t this_len = len;
1558             unsigned char *p = pat;
1559             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1560               goto stop;
1561
1562             while (this_len > 0)
1563               {
1564                 int charlen, buf_charlen;
1565                 int pat_ch, buf_ch;
1566
1567                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1568                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1569                                                  buf_charlen);
1570                 TRANSLATE (buf_ch, trt, buf_ch);
1571
1572                 if (buf_ch != pat_ch)
1573                   break;
1574
1575                 this_len--;
1576                 p += charlen;
1577
1578                 this_pos_byte += buf_charlen;
1579                 this_pos++;
1580               }
1581
1582             if (this_len == 0)
1583               {
1584                 match_byte = this_pos_byte - pos_byte;
1585                 pos += len;
1586                 pos_byte += match_byte;
1587                 break;
1588               }
1589
1590             INC_BOTH (pos, pos_byte);
1591           }
1592
1593         n--;
1594       }
1595   else if (lim > pos)
1596     while (n > 0)
1597       {
1598         while (1)
1599           {
1600             /* Try matching at position POS.  */
1601             ptrdiff_t this_pos = pos;
1602             ptrdiff_t this_len = len;
1603             unsigned char *p = pat;
1604
1605             if (pos + len > lim)
1606               goto stop;
1607
1608             while (this_len > 0)
1609               {
1610                 int pat_ch = *p++;
1611                 int buf_ch = FETCH_BYTE (this_pos);
1612                 TRANSLATE (buf_ch, trt, buf_ch);
1613
1614                 if (buf_ch != pat_ch)
1615                   break;
1616
1617                 this_len--;
1618                 this_pos++;
1619               }
1620
1621             if (this_len == 0)
1622               {
1623                 match_byte = len;
1624                 pos += len;
1625                 break;
1626               }
1627
1628             pos++;
1629           }
1630
1631         n--;
1632       }
1633   /* Backwards search.  */
1634   else if (lim < pos && multibyte)
1635     while (n < 0)
1636       {
1637         while (1)
1638           {
1639             /* Try matching at position POS.  */
1640             ptrdiff_t this_pos = pos;
1641             ptrdiff_t this_pos_byte = pos_byte;
1642             ptrdiff_t this_len = len;
1643             const unsigned char *p = pat + len_byte;
1644
1645             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1646               goto stop;
1647
1648             while (this_len > 0)
1649               {
1650                 int pat_ch, buf_ch;
1651
1652                 DEC_BOTH (this_pos, this_pos_byte);
1653                 PREV_CHAR_BOUNDARY (p, pat);
1654                 pat_ch = STRING_CHAR (p);
1655                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1656                 TRANSLATE (buf_ch, trt, buf_ch);
1657
1658                 if (buf_ch != pat_ch)
1659                   break;
1660
1661                 this_len--;
1662               }
1663
1664             if (this_len == 0)
1665               {
1666                 match_byte = pos_byte - this_pos_byte;
1667                 pos = this_pos;
1668                 pos_byte = this_pos_byte;
1669                 break;
1670               }
1671
1672             DEC_BOTH (pos, pos_byte);
1673           }
1674
1675         n++;
1676       }
1677   else if (lim < pos)
1678     while (n < 0)
1679       {
1680         while (1)
1681           {
1682             /* Try matching at position POS.  */
1683             ptrdiff_t this_pos = pos - len;
1684             ptrdiff_t this_len = len;
1685             unsigned char *p = pat;
1686
1687             if (this_pos < lim)
1688               goto stop;
1689
1690             while (this_len > 0)
1691               {
1692                 int pat_ch = *p++;
1693                 int buf_ch = FETCH_BYTE (this_pos);
1694                 TRANSLATE (buf_ch, trt, buf_ch);
1695
1696                 if (buf_ch != pat_ch)
1697                   break;
1698                 this_len--;
1699                 this_pos++;
1700               }
1701
1702             if (this_len == 0)
1703               {
1704                 match_byte = len;
1705                 pos -= len;
1706                 break;
1707               }
1708
1709             pos--;
1710           }
1711
1712         n++;
1713       }
1714
1715  stop:
1716   if (n == 0)
1717     {
1718       eassert (match_byte != PTRDIFF_MIN);
1719       if (forward)
1720         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1721       else
1722         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1723
1724       return pos;
1725     }
1726   else if (n > 0)
1727     return -n;
1728   else
1729     return n;
1730 }
1731 \f
1732 /* Do Boyer-Moore search N times for the string BASE_PAT,
1733    whose length is LEN_BYTE,
1734    from buffer position POS_BYTE until LIM_BYTE.
1735    DIRECTION says which direction we search in.
1736    TRT and INVERSE_TRT are translation tables.
1737    Characters in PAT are already translated by TRT.
1738
1739    This kind of search works if all the characters in BASE_PAT that
1740    have nontrivial translation are the same aside from the last byte.
1741    This makes it possible to translate just the last byte of a
1742    character, and do so after just a simple test of the context.
1743    CHAR_BASE is nonzero if there is such a non-ASCII character.
1744
1745    If that criterion is not satisfied, do not call this function.  */
1746
1747 static EMACS_INT
1748 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1749              ptrdiff_t len_byte,
1750              Lisp_Object trt, Lisp_Object inverse_trt,
1751              ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1752              int char_base)
1753 {
1754   int direction = ((n > 0) ? 1 : -1);
1755   register ptrdiff_t dirlen;
1756   ptrdiff_t limit;
1757   int stride_for_teases = 0;
1758   int BM_tab[0400];
1759   register unsigned char *cursor, *p_limit;
1760   register ptrdiff_t i;
1761   register int j;
1762   unsigned char *pat, *pat_end;
1763   bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1764
1765   unsigned char simple_translate[0400];
1766   /* These are set to the preceding bytes of a byte to be translated
1767      if char_base is nonzero.  As the maximum byte length of a
1768      multibyte character is 5, we have to check at most four previous
1769      bytes.  */
1770   int translate_prev_byte1 = 0;
1771   int translate_prev_byte2 = 0;
1772   int translate_prev_byte3 = 0;
1773
1774   /* The general approach is that we are going to maintain that we know
1775      the first (closest to the present position, in whatever direction
1776      we're searching) character that could possibly be the last
1777      (furthest from present position) character of a valid match.  We
1778      advance the state of our knowledge by looking at that character
1779      and seeing whether it indeed matches the last character of the
1780      pattern.  If it does, we take a closer look.  If it does not, we
1781      move our pointer (to putative last characters) as far as is
1782      logically possible.  This amount of movement, which I call a
1783      stride, will be the length of the pattern if the actual character
1784      appears nowhere in the pattern, otherwise it will be the distance
1785      from the last occurrence of that character to the end of the
1786      pattern.  If the amount is zero we have a possible match.  */
1787
1788   /* Here we make a "mickey mouse" BM table.  The stride of the search
1789      is determined only by the last character of the putative match.
1790      If that character does not match, we will stride the proper
1791      distance to propose a match that superimposes it on the last
1792      instance of a character that matches it (per trt), or misses
1793      it entirely if there is none. */
1794
1795   dirlen = len_byte * direction;
1796
1797   /* Record position after the end of the pattern.  */
1798   pat_end = base_pat + len_byte;
1799   /* BASE_PAT points to a character that we start scanning from.
1800      It is the first character in a forward search,
1801      the last character in a backward search.  */
1802   if (direction < 0)
1803     base_pat = pat_end - 1;
1804
1805   /* A character that does not appear in the pattern induces a
1806      stride equal to the pattern length.  */
1807   for (i = 0; i < 0400; i++)
1808     BM_tab[i] = dirlen;
1809
1810   /* We use this for translation, instead of TRT itself.
1811      We fill this in to handle the characters that actually
1812      occur in the pattern.  Others don't matter anyway!  */
1813   for (i = 0; i < 0400; i++)
1814     simple_translate[i] = i;
1815
1816   if (char_base)
1817     {
1818       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1819          byte following them are the target of translation.  */
1820       unsigned char str[MAX_MULTIBYTE_LENGTH];
1821       int cblen = CHAR_STRING (char_base, str);
1822
1823       translate_prev_byte1 = str[cblen - 2];
1824       if (cblen > 2)
1825         {
1826           translate_prev_byte2 = str[cblen - 3];
1827           if (cblen > 3)
1828             translate_prev_byte3 = str[cblen - 4];
1829         }
1830     }
1831
1832   i = 0;
1833   while (i != dirlen)
1834     {
1835       unsigned char *ptr = base_pat + i;
1836       i += direction;
1837       if (! NILP (trt))
1838         {
1839           /* If the byte currently looking at is the last of a
1840              character to check case-equivalents, set CH to that
1841              character.  An ASCII character and a non-ASCII character
1842              matching with CHAR_BASE are to be checked.  */
1843           int ch = -1;
1844
1845           if (ASCII_CHAR_P (*ptr) || ! multibyte)
1846             ch = *ptr;
1847           else if (char_base
1848                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1849             {
1850               unsigned char *charstart = ptr - 1;
1851
1852               while (! (CHAR_HEAD_P (*charstart)))
1853                 charstart--;
1854               ch = STRING_CHAR (charstart);
1855               if (char_base != (ch & ~0x3F))
1856                 ch = -1;
1857             }
1858
1859           if (ch >= 0200 && multibyte)
1860             j = (ch & 0x3F) | 0200;
1861           else
1862             j = *ptr;
1863
1864           if (i == dirlen)
1865             stride_for_teases = BM_tab[j];
1866
1867           BM_tab[j] = dirlen - i;
1868           /* A translation table is accompanied by its inverse -- see
1869              comment following downcase_table for details.  */
1870           if (ch >= 0)
1871             {
1872               int starting_ch = ch;
1873               int starting_j = j;
1874
1875               while (1)
1876                 {
1877                   TRANSLATE (ch, inverse_trt, ch);
1878                   if (ch >= 0200 && multibyte)
1879                     j = (ch & 0x3F) | 0200;
1880                   else
1881                     j = ch;
1882
1883                   /* For all the characters that map into CH,
1884                      set up simple_translate to map the last byte
1885                      into STARTING_J.  */
1886                   simple_translate[j] = starting_j;
1887                   if (ch == starting_ch)
1888                     break;
1889                   BM_tab[j] = dirlen - i;
1890                 }
1891             }
1892         }
1893       else
1894         {
1895           j = *ptr;
1896
1897           if (i == dirlen)
1898             stride_for_teases = BM_tab[j];
1899           BM_tab[j] = dirlen - i;
1900         }
1901       /* stride_for_teases tells how much to stride if we get a
1902          match on the far character but are subsequently
1903          disappointed, by recording what the stride would have been
1904          for that character if the last character had been
1905          different.  */
1906     }
1907   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1908   /* loop invariant - POS_BYTE points at where last char (first
1909      char if reverse) of pattern would align in a possible match.  */
1910   while (n != 0)
1911     {
1912       ptrdiff_t tail_end;
1913       unsigned char *tail_end_ptr;
1914
1915       /* It's been reported that some (broken) compiler thinks that
1916          Boolean expressions in an arithmetic context are unsigned.
1917          Using an explicit ?1:0 prevents this.  */
1918       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1919           < 0)
1920         return (n * (0 - direction));
1921       /* First we do the part we can by pointers (maybe nothing) */
1922       QUIT;
1923       pat = base_pat;
1924       limit = pos_byte - dirlen + direction;
1925       if (direction > 0)
1926         {
1927           limit = BUFFER_CEILING_OF (limit);
1928           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1929              can take on without hitting edge of buffer or the gap.  */
1930           limit = min (limit, pos_byte + 20000);
1931           limit = min (limit, lim_byte - 1);
1932         }
1933       else
1934         {
1935           limit = BUFFER_FLOOR_OF (limit);
1936           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1937              can take on without hitting edge of buffer or the gap.  */
1938           limit = max (limit, pos_byte - 20000);
1939           limit = max (limit, lim_byte);
1940         }
1941       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1942       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1943
1944       if ((limit - pos_byte) * direction > 20)
1945         {
1946           unsigned char *p2;
1947
1948           p_limit = BYTE_POS_ADDR (limit);
1949           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1950           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1951           while (1)             /* use one cursor setting as long as i can */
1952             {
1953               if (direction > 0) /* worth duplicating */
1954                 {
1955                   while (cursor <= p_limit)
1956                     {
1957                       if (BM_tab[*cursor] == 0)
1958                         goto hit;
1959                       cursor += BM_tab[*cursor];
1960                     }
1961                 }
1962               else
1963                 {
1964                   while (cursor >= p_limit)
1965                     {
1966                       if (BM_tab[*cursor] == 0)
1967                         goto hit;
1968                       cursor += BM_tab[*cursor];
1969                     }
1970                 }
1971               /* If you are here, cursor is beyond the end of the
1972                  searched region.  You fail to match within the
1973                  permitted region and would otherwise try a character
1974                  beyond that region.  */
1975               break;
1976
1977             hit:
1978               i = dirlen - direction;
1979               if (! NILP (trt))
1980                 {
1981                   while ((i -= direction) + direction != 0)
1982                     {
1983                       int ch;
1984                       cursor -= direction;
1985                       /* Translate only the last byte of a character.  */
1986                       if (! multibyte
1987                           || ((cursor == tail_end_ptr
1988                                || CHAR_HEAD_P (cursor[1]))
1989                               && (CHAR_HEAD_P (cursor[0])
1990                                   /* Check if this is the last byte of
1991                                      a translatable character.  */
1992                                   || (translate_prev_byte1 == cursor[-1]
1993                                       && (CHAR_HEAD_P (translate_prev_byte1)
1994                                           || (translate_prev_byte2 == cursor[-2]
1995                                               && (CHAR_HEAD_P (translate_prev_byte2)
1996                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1997                         ch = simple_translate[*cursor];
1998                       else
1999                         ch = *cursor;
2000                       if (pat[i] != ch)
2001                         break;
2002                     }
2003                 }
2004               else
2005                 {
2006                   while ((i -= direction) + direction != 0)
2007                     {
2008                       cursor -= direction;
2009                       if (pat[i] != *cursor)
2010                         break;
2011                     }
2012                 }
2013               cursor += dirlen - i - direction; /* fix cursor */
2014               if (i + direction == 0)
2015                 {
2016                   ptrdiff_t position, start, end;
2017
2018                   cursor -= direction;
2019
2020                   position = pos_byte + cursor - p2 + ((direction > 0)
2021                                                        ? 1 - len_byte : 0);
2022                   set_search_regs (position, len_byte);
2023
2024                   if (NILP (Vinhibit_changing_match_data))
2025                     {
2026                       start = search_regs.start[0];
2027                       end = search_regs.end[0];
2028                     }
2029                   else
2030                     /* If Vinhibit_changing_match_data is non-nil,
2031                        search_regs will not be changed.  So let's
2032                        compute start and end here.  */
2033                     {
2034                       start = BYTE_TO_CHAR (position);
2035                       end = BYTE_TO_CHAR (position + len_byte);
2036                     }
2037
2038                   if ((n -= direction) != 0)
2039                     cursor += dirlen; /* to resume search */
2040                   else
2041                     return direction > 0 ? end : start;
2042                 }
2043               else
2044                 cursor += stride_for_teases; /* <sigh> we lose -  */
2045             }
2046           pos_byte += cursor - p2;
2047         }
2048       else
2049         /* Now we'll pick up a clump that has to be done the hard
2050            way because it covers a discontinuity.  */
2051         {
2052           limit = ((direction > 0)
2053                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2054                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2055           limit = ((direction > 0)
2056                    ? min (limit + len_byte, lim_byte - 1)
2057                    : max (limit - len_byte, lim_byte));
2058           /* LIMIT is now the last value POS_BYTE can have
2059              and still be valid for a possible match.  */
2060           while (1)
2061             {
2062               /* This loop can be coded for space rather than
2063                  speed because it will usually run only once.
2064                  (the reach is at most len + 21, and typically
2065                  does not exceed len).  */
2066               while ((limit - pos_byte) * direction >= 0)
2067                 {
2068                   int ch = FETCH_BYTE (pos_byte);
2069                   if (BM_tab[ch] == 0)
2070                     goto hit2;
2071                   pos_byte += BM_tab[ch];
2072                 }
2073               break;    /* ran off the end */
2074
2075             hit2:
2076               /* Found what might be a match.  */
2077               i = dirlen - direction;
2078               while ((i -= direction) + direction != 0)
2079                 {
2080                   int ch;
2081                   unsigned char *ptr;
2082                   pos_byte -= direction;
2083                   ptr = BYTE_POS_ADDR (pos_byte);
2084                   /* Translate only the last byte of a character.  */
2085                   if (! multibyte
2086                       || ((ptr == tail_end_ptr
2087                            || CHAR_HEAD_P (ptr[1]))
2088                           && (CHAR_HEAD_P (ptr[0])
2089                               /* Check if this is the last byte of a
2090                                  translatable character.  */
2091                               || (translate_prev_byte1 == ptr[-1]
2092                                   && (CHAR_HEAD_P (translate_prev_byte1)
2093                                       || (translate_prev_byte2 == ptr[-2]
2094                                           && (CHAR_HEAD_P (translate_prev_byte2)
2095                                               || translate_prev_byte3 == ptr[-3])))))))
2096                     ch = simple_translate[*ptr];
2097                   else
2098                     ch = *ptr;
2099                   if (pat[i] != ch)
2100                     break;
2101                 }
2102               /* Above loop has moved POS_BYTE part or all the way
2103                  back to the first pos (last pos if reverse).
2104                  Set it once again at the last (first if reverse) char.  */
2105               pos_byte += dirlen - i - direction;
2106               if (i + direction == 0)
2107                 {
2108                   ptrdiff_t position, start, end;
2109                   pos_byte -= direction;
2110
2111                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2112                   set_search_regs (position, len_byte);
2113
2114                   if (NILP (Vinhibit_changing_match_data))
2115                     {
2116                       start = search_regs.start[0];
2117                       end = search_regs.end[0];
2118                     }
2119                   else
2120                     /* If Vinhibit_changing_match_data is non-nil,
2121                        search_regs will not be changed.  So let's
2122                        compute start and end here.  */
2123                     {
2124                       start = BYTE_TO_CHAR (position);
2125                       end = BYTE_TO_CHAR (position + len_byte);
2126                     }
2127
2128                   if ((n -= direction) != 0)
2129                     pos_byte += dirlen; /* to resume search */
2130                   else
2131                     return direction > 0 ? end : start;
2132                 }
2133               else
2134                 pos_byte += stride_for_teases;
2135             }
2136           }
2137       /* We have done one clump.  Can we continue? */
2138       if ((lim_byte - pos_byte) * direction < 0)
2139         return ((0 - n) * direction);
2140     }
2141   return BYTE_TO_CHAR (pos_byte);
2142 }
2143
2144 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2145    for the overall match just found in the current buffer.
2146    Also clear out the match data for registers 1 and up.  */
2147
2148 static void
2149 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2150 {
2151   ptrdiff_t i;
2152
2153   if (!NILP (Vinhibit_changing_match_data))
2154     return;
2155
2156   /* Make sure we have registers in which to store
2157      the match position.  */
2158   if (search_regs.num_regs == 0)
2159     {
2160       search_regs.start = xmalloc (2 * sizeof (regoff_t));
2161       search_regs.end = xmalloc (2 * sizeof (regoff_t));
2162       search_regs.num_regs = 2;
2163     }
2164
2165   /* Clear out the other registers.  */
2166   for (i = 1; i < search_regs.num_regs; i++)
2167     {
2168       search_regs.start[i] = -1;
2169       search_regs.end[i] = -1;
2170     }
2171
2172   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2173   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2174   XSETBUFFER (last_thing_searched, current_buffer);
2175 }
2176 \f
2177 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2178        "MSearch backward: ",
2179        doc: /* Search backward from point for STRING.
2180 Set point to the beginning of the occurrence found, and return point.
2181 An optional second argument bounds the search; it is a buffer position.
2182 The match found must not extend before that position.
2183 Optional third argument, if t, means if fail just return nil (no error).
2184  If not nil and not t, position at limit of search and return nil.
2185 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2186  successive occurrences.  If COUNT is negative, search forward,
2187  instead of backward, for -COUNT occurrences.
2188
2189 Search case-sensitivity is determined by the value of the variable
2190 `case-fold-search', which see.
2191
2192 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2193   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2194 {
2195   return search_command (string, bound, noerror, count, -1, 0, 0);
2196 }
2197
2198 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2199        doc: /* Search forward from point for STRING.
2200 Set point to the end of the occurrence found, and return point.
2201 An optional second argument bounds the search; it is a buffer position.
2202 The match found must not extend after that position.  A value of nil is
2203   equivalent to (point-max).
2204 Optional third argument, if t, means if fail just return nil (no error).
2205   If not nil and not t, move to limit of search and return nil.
2206 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2207  successive occurrences.  If COUNT is negative, search backward,
2208  instead of forward, for -COUNT occurrences.
2209
2210 Search case-sensitivity is determined by the value of the variable
2211 `case-fold-search', which see.
2212
2213 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2214   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2215 {
2216   return search_command (string, bound, noerror, count, 1, 0, 0);
2217 }
2218
2219 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2220        "sRE search backward: ",
2221        doc: /* Search backward from point for match for regular expression REGEXP.
2222 Set point to the beginning of the match, and return point.
2223 The match found is the one starting last in the buffer
2224 and yet ending before the origin of the search.
2225 An optional second argument bounds the search; it is a buffer position.
2226 The match found must start at or after that position.
2227 Optional third argument, if t, means if fail just return nil (no error).
2228   If not nil and not t, move to limit of search and return nil.
2229 Optional fourth argument is repeat count--search for successive occurrences.
2230
2231 Search case-sensitivity is determined by the value of the variable
2232 `case-fold-search', which see.
2233
2234 See also the functions `match-beginning', `match-end', `match-string',
2235 and `replace-match'.  */)
2236   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2237 {
2238   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2239 }
2240
2241 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2242        "sRE search: ",
2243        doc: /* Search forward from point for regular expression REGEXP.
2244 Set point to the end of the occurrence found, and return point.
2245 An optional second argument bounds the search; it is a buffer position.
2246 The match found must not extend after that position.
2247 Optional third argument, if t, means if fail just return nil (no error).
2248   If not nil and not t, move to limit of search and return nil.
2249 Optional fourth argument is repeat count--search for successive occurrences.
2250
2251 Search case-sensitivity is determined by the value of the variable
2252 `case-fold-search', which see.
2253
2254 See also the functions `match-beginning', `match-end', `match-string',
2255 and `replace-match'.  */)
2256   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2257 {
2258   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2259 }
2260
2261 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2262        "sPosix search backward: ",
2263        doc: /* Search backward from point for match for regular expression REGEXP.
2264 Find the longest match in accord with Posix regular expression rules.
2265 Set point to the beginning of the match, and return point.
2266 The match found is the one starting last in the buffer
2267 and yet ending before the origin of the search.
2268 An optional second argument bounds the search; it is a buffer position.
2269 The match found must start at or after that position.
2270 Optional third argument, if t, means if fail just return nil (no error).
2271   If not nil and not t, move to limit of search and return nil.
2272 Optional fourth argument is repeat count--search for successive occurrences.
2273
2274 Search case-sensitivity is determined by the value of the variable
2275 `case-fold-search', which see.
2276
2277 See also the functions `match-beginning', `match-end', `match-string',
2278 and `replace-match'.  */)
2279   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2280 {
2281   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2282 }
2283
2284 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2285        "sPosix search: ",
2286        doc: /* Search forward from point for regular expression REGEXP.
2287 Find the longest match in accord with Posix regular expression rules.
2288 Set point to the end of the occurrence found, and return point.
2289 An optional second argument bounds the search; it is a buffer position.
2290 The match found must not extend after that position.
2291 Optional third argument, if t, means if fail just return nil (no error).
2292   If not nil and not t, move to limit of search and return nil.
2293 Optional fourth argument is repeat count--search for successive occurrences.
2294
2295 Search case-sensitivity is determined by the value of the variable
2296 `case-fold-search', which see.
2297
2298 See also the functions `match-beginning', `match-end', `match-string',
2299 and `replace-match'.  */)
2300   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2301 {
2302   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2303 }
2304 \f
2305 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2306        doc: /* Replace text matched by last search with NEWTEXT.
2307 Leave point at the end of the replacement text.
2308
2309 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2310 the replacement text.  Otherwise, maybe capitalize the whole text, or
2311 maybe just word initials, based on the replaced text.  If the replaced
2312 text has only capital letters and has at least one multiletter word,
2313 convert NEWTEXT to all caps.  Otherwise if all words are capitalized
2314 in the replaced text, capitalize each word in NEWTEXT.
2315
2316 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2317 Otherwise treat `\\' as special:
2318   `\\&' in NEWTEXT means substitute original matched text.
2319   `\\N' means substitute what matched the Nth `\\(...\\)'.
2320        If Nth parens didn't match, substitute nothing.
2321   `\\\\' means insert one `\\'.
2322   `\\?' is treated literally
2323        (for compatibility with `query-replace-regexp').
2324   Any other character following `\\' signals an error.
2325 Case conversion does not apply to these substitutions.
2326
2327 If optional fourth argument STRING is non-nil, it should be a string
2328 to act on; this should be the string on which the previous match was
2329 done via `string-match'.  In this case, `replace-match' creates and
2330 returns a new string, made by copying STRING and replacing the part of
2331 STRING that was matched (the original STRING itself is not altered).
2332
2333 The optional fifth argument SUBEXP specifies a subexpression;
2334 it says to replace just that subexpression with NEWTEXT,
2335 rather than replacing the entire matched text.
2336 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2337 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2338 NEWTEXT in place of subexp N.
2339 This is useful only after a regular expression search or match,
2340 since only regular expressions have distinguished subexpressions.  */)
2341   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2342 {
2343   enum { nochange, all_caps, cap_initial } case_action;
2344   ptrdiff_t pos, pos_byte;
2345   bool some_multiletter_word;
2346   bool some_lowercase;
2347   bool some_uppercase;
2348   bool some_nonuppercase_initial;
2349   int c, prevc;
2350   ptrdiff_t sub;
2351   ptrdiff_t opoint, newpoint;
2352
2353   CHECK_STRING (newtext);
2354
2355   if (! NILP (string))
2356     CHECK_STRING (string);
2357
2358   case_action = nochange;       /* We tried an initialization */
2359                                 /* but some C compilers blew it */
2360
2361   if (search_regs.num_regs <= 0)
2362     error ("`replace-match' called before any match found");
2363
2364   if (NILP (subexp))
2365     sub = 0;
2366   else
2367     {
2368       CHECK_NUMBER (subexp);
2369       if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
2370         args_out_of_range (subexp, make_number (search_regs.num_regs));
2371       sub = XINT (subexp);
2372     }
2373
2374   if (NILP (string))
2375     {
2376       if (search_regs.start[sub] < BEGV
2377           || search_regs.start[sub] > search_regs.end[sub]
2378           || search_regs.end[sub] > ZV)
2379         args_out_of_range (make_number (search_regs.start[sub]),
2380                            make_number (search_regs.end[sub]));
2381     }
2382   else
2383     {
2384       if (search_regs.start[sub] < 0
2385           || search_regs.start[sub] > search_regs.end[sub]
2386           || search_regs.end[sub] > SCHARS (string))
2387         args_out_of_range (make_number (search_regs.start[sub]),
2388                            make_number (search_regs.end[sub]));
2389     }
2390
2391   if (NILP (fixedcase))
2392     {
2393       /* Decide how to casify by examining the matched text. */
2394       ptrdiff_t last;
2395
2396       pos = search_regs.start[sub];
2397       last = search_regs.end[sub];
2398
2399       if (NILP (string))
2400         pos_byte = CHAR_TO_BYTE (pos);
2401       else
2402         pos_byte = string_char_to_byte (string, pos);
2403
2404       prevc = '\n';
2405       case_action = all_caps;
2406
2407       /* some_multiletter_word is set nonzero if any original word
2408          is more than one letter long. */
2409       some_multiletter_word = 0;
2410       some_lowercase = 0;
2411       some_nonuppercase_initial = 0;
2412       some_uppercase = 0;
2413
2414       while (pos < last)
2415         {
2416           if (NILP (string))
2417             {
2418               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2419               INC_BOTH (pos, pos_byte);
2420             }
2421           else
2422             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2423
2424           if (lowercasep (c))
2425             {
2426               /* Cannot be all caps if any original char is lower case */
2427
2428               some_lowercase = 1;
2429               if (SYNTAX (prevc) != Sword)
2430                 some_nonuppercase_initial = 1;
2431               else
2432                 some_multiletter_word = 1;
2433             }
2434           else if (uppercasep (c))
2435             {
2436               some_uppercase = 1;
2437               if (SYNTAX (prevc) != Sword)
2438                 ;
2439               else
2440                 some_multiletter_word = 1;
2441             }
2442           else
2443             {
2444               /* If the initial is a caseless word constituent,
2445                  treat that like a lowercase initial.  */
2446               if (SYNTAX (prevc) != Sword)
2447                 some_nonuppercase_initial = 1;
2448             }
2449
2450           prevc = c;
2451         }
2452
2453       /* Convert to all caps if the old text is all caps
2454          and has at least one multiletter word.  */
2455       if (! some_lowercase && some_multiletter_word)
2456         case_action = all_caps;
2457       /* Capitalize each word, if the old text has all capitalized words.  */
2458       else if (!some_nonuppercase_initial && some_multiletter_word)
2459         case_action = cap_initial;
2460       else if (!some_nonuppercase_initial && some_uppercase)
2461         /* Should x -> yz, operating on X, give Yz or YZ?
2462            We'll assume the latter.  */
2463         case_action = all_caps;
2464       else
2465         case_action = nochange;
2466     }
2467
2468   /* Do replacement in a string.  */
2469   if (!NILP (string))
2470     {
2471       Lisp_Object before, after;
2472
2473       before = Fsubstring (string, make_number (0),
2474                            make_number (search_regs.start[sub]));
2475       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2476
2477       /* Substitute parts of the match into NEWTEXT
2478          if desired.  */
2479       if (NILP (literal))
2480         {
2481           ptrdiff_t lastpos = 0;
2482           ptrdiff_t lastpos_byte = 0;
2483           /* We build up the substituted string in ACCUM.  */
2484           Lisp_Object accum;
2485           Lisp_Object middle;
2486           ptrdiff_t length = SBYTES (newtext);
2487
2488           accum = Qnil;
2489
2490           for (pos_byte = 0, pos = 0; pos_byte < length;)
2491             {
2492               ptrdiff_t substart = -1;
2493               ptrdiff_t subend = 0;
2494               bool delbackslash = 0;
2495
2496               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2497
2498               if (c == '\\')
2499                 {
2500                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2501
2502                   if (c == '&')
2503                     {
2504                       substart = search_regs.start[sub];
2505                       subend = search_regs.end[sub];
2506                     }
2507                   else if (c >= '1' && c <= '9')
2508                     {
2509                       if (c - '0' < search_regs.num_regs
2510                           && search_regs.start[c - '0'] >= 0)
2511                         {
2512                           substart = search_regs.start[c - '0'];
2513                           subend = search_regs.end[c - '0'];
2514                         }
2515                       else
2516                         {
2517                           /* If that subexp did not match,
2518                              replace \\N with nothing.  */
2519                           substart = 0;
2520                           subend = 0;
2521                         }
2522                     }
2523                   else if (c == '\\')
2524                     delbackslash = 1;
2525                   else if (c != '?')
2526                     error ("Invalid use of `\\' in replacement text");
2527                 }
2528               if (substart >= 0)
2529                 {
2530                   if (pos - 2 != lastpos)
2531                     middle = substring_both (newtext, lastpos,
2532                                              lastpos_byte,
2533                                              pos - 2, pos_byte - 2);
2534                   else
2535                     middle = Qnil;
2536                   accum = concat3 (accum, middle,
2537                                    Fsubstring (string,
2538                                                make_number (substart),
2539                                                make_number (subend)));
2540                   lastpos = pos;
2541                   lastpos_byte = pos_byte;
2542                 }
2543               else if (delbackslash)
2544                 {
2545                   middle = substring_both (newtext, lastpos,
2546                                            lastpos_byte,
2547                                            pos - 1, pos_byte - 1);
2548
2549                   accum = concat2 (accum, middle);
2550                   lastpos = pos;
2551                   lastpos_byte = pos_byte;
2552                 }
2553             }
2554
2555           if (pos != lastpos)
2556             middle = substring_both (newtext, lastpos,
2557                                      lastpos_byte,
2558                                      pos, pos_byte);
2559           else
2560             middle = Qnil;
2561
2562           newtext = concat2 (accum, middle);
2563         }
2564
2565       /* Do case substitution in NEWTEXT if desired.  */
2566       if (case_action == all_caps)
2567         newtext = Fupcase (newtext);
2568       else if (case_action == cap_initial)
2569         newtext = Fupcase_initials (newtext);
2570
2571       return concat3 (before, newtext, after);
2572     }
2573
2574   /* Record point, then move (quietly) to the start of the match.  */
2575   if (PT >= search_regs.end[sub])
2576     opoint = PT - ZV;
2577   else if (PT > search_regs.start[sub])
2578     opoint = search_regs.end[sub] - ZV;
2579   else
2580     opoint = PT;
2581
2582   /* If we want non-literal replacement,
2583      perform substitution on the replacement string.  */
2584   if (NILP (literal))
2585     {
2586       ptrdiff_t length = SBYTES (newtext);
2587       unsigned char *substed;
2588       ptrdiff_t substed_alloc_size, substed_len;
2589       bool buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2590       bool str_multibyte = STRING_MULTIBYTE (newtext);
2591       bool really_changed = 0;
2592
2593       substed_alloc_size = (length <= (STRING_BYTES_BOUND - 100) / 2
2594                             ? length * 2 + 100
2595                             : STRING_BYTES_BOUND);
2596       substed = xmalloc (substed_alloc_size);
2597       substed_len = 0;
2598
2599       /* Go thru NEWTEXT, producing the actual text to insert in
2600          SUBSTED while adjusting multibyteness to that of the current
2601          buffer.  */
2602
2603       for (pos_byte = 0, pos = 0; pos_byte < length;)
2604         {
2605           unsigned char str[MAX_MULTIBYTE_LENGTH];
2606           const unsigned char *add_stuff = NULL;
2607           ptrdiff_t add_len = 0;
2608           ptrdiff_t idx = -1;
2609
2610           if (str_multibyte)
2611             {
2612               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2613               if (!buf_multibyte)
2614                 c = CHAR_TO_BYTE8 (c);
2615             }
2616           else
2617             {
2618               /* Note that we don't have to increment POS.  */
2619               c = SREF (newtext, pos_byte++);
2620               if (buf_multibyte)
2621                 MAKE_CHAR_MULTIBYTE (c);
2622             }
2623
2624           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2625              or set IDX to a match index, which means put that part
2626              of the buffer text into SUBSTED.  */
2627
2628           if (c == '\\')
2629             {
2630               really_changed = 1;
2631
2632               if (str_multibyte)
2633                 {
2634                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2635                                                       pos, pos_byte);
2636                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2637                     c = CHAR_TO_BYTE8 (c);
2638                 }
2639               else
2640                 {
2641                   c = SREF (newtext, pos_byte++);
2642                   if (buf_multibyte)
2643                     MAKE_CHAR_MULTIBYTE (c);
2644                 }
2645
2646               if (c == '&')
2647                 idx = sub;
2648               else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
2649                 {
2650                   if (search_regs.start[c - '0'] >= 1)
2651                     idx = c - '0';
2652                 }
2653               else if (c == '\\')
2654                 add_len = 1, add_stuff = (unsigned char *) "\\";
2655               else
2656                 {
2657                   xfree (substed);
2658                   error ("Invalid use of `\\' in replacement text");
2659                 }
2660             }
2661           else
2662             {
2663               add_len = CHAR_STRING (c, str);
2664               add_stuff = str;
2665             }
2666
2667           /* If we want to copy part of a previous match,
2668              set up ADD_STUFF and ADD_LEN to point to it.  */
2669           if (idx >= 0)
2670             {
2671               ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2672               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2673               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2674                 move_gap_both (search_regs.start[idx], begbyte);
2675               add_stuff = BYTE_POS_ADDR (begbyte);
2676             }
2677
2678           /* Now the stuff we want to add to SUBSTED
2679              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2680
2681           /* Make sure SUBSTED is big enough.  */
2682           if (substed_alloc_size - substed_len < add_len)
2683             substed =
2684               xpalloc (substed, &substed_alloc_size,
2685                        add_len - (substed_alloc_size - substed_len),
2686                        STRING_BYTES_BOUND, 1);
2687
2688           /* Now add to the end of SUBSTED.  */
2689           if (add_stuff)
2690             {
2691               memcpy (substed + substed_len, add_stuff, add_len);
2692               substed_len += add_len;
2693             }
2694         }
2695
2696       if (really_changed)
2697         newtext = make_specified_string ((const char *) substed, -1,
2698                                          substed_len, buf_multibyte);
2699       xfree (substed);
2700     }
2701
2702   /* Replace the old text with the new in the cleanest possible way.  */
2703   replace_range (search_regs.start[sub], search_regs.end[sub],
2704                  newtext, 1, 0, 1);
2705   newpoint = search_regs.start[sub] + SCHARS (newtext);
2706
2707   if (case_action == all_caps)
2708     Fupcase_region (make_number (search_regs.start[sub]),
2709                     make_number (newpoint));
2710   else if (case_action == cap_initial)
2711     Fupcase_initials_region (make_number (search_regs.start[sub]),
2712                              make_number (newpoint));
2713
2714   /* Adjust search data for this change.  */
2715   {
2716     ptrdiff_t oldend = search_regs.end[sub];
2717     ptrdiff_t oldstart = search_regs.start[sub];
2718     ptrdiff_t change = newpoint - search_regs.end[sub];
2719     ptrdiff_t i;
2720
2721     for (i = 0; i < search_regs.num_regs; i++)
2722       {
2723         if (search_regs.start[i] >= oldend)
2724           search_regs.start[i] += change;
2725         else if (search_regs.start[i] > oldstart)
2726           search_regs.start[i] = oldstart;
2727         if (search_regs.end[i] >= oldend)
2728           search_regs.end[i] += change;
2729         else if (search_regs.end[i] > oldstart)
2730           search_regs.end[i] = oldstart;
2731       }
2732   }
2733
2734   /* Put point back where it was in the text.  */
2735   if (opoint <= 0)
2736     TEMP_SET_PT (opoint + ZV);
2737   else
2738     TEMP_SET_PT (opoint);
2739
2740   /* Now move point "officially" to the start of the inserted replacement.  */
2741   move_if_not_intangible (newpoint);
2742
2743   return Qnil;
2744 }
2745 \f
2746 static Lisp_Object
2747 match_limit (Lisp_Object num, bool beginningp)
2748 {
2749   EMACS_INT n;
2750
2751   CHECK_NUMBER (num);
2752   n = XINT (num);
2753   if (n < 0)
2754     args_out_of_range (num, make_number (0));
2755   if (search_regs.num_regs <= 0)
2756     error ("No match data, because no search succeeded");
2757   if (n >= search_regs.num_regs
2758       || search_regs.start[n] < 0)
2759     return Qnil;
2760   return (make_number ((beginningp) ? search_regs.start[n]
2761                                     : search_regs.end[n]));
2762 }
2763
2764 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2765        doc: /* Return position of start of text matched by last search.
2766 SUBEXP, a number, specifies which parenthesized expression in the last
2767   regexp.
2768 Value is nil if SUBEXPth pair didn't match, or there were less than
2769   SUBEXP pairs.
2770 Zero means the entire text matched by the whole regexp or whole string.  */)
2771   (Lisp_Object subexp)
2772 {
2773   return match_limit (subexp, 1);
2774 }
2775
2776 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2777        doc: /* Return position of end of text matched by last search.
2778 SUBEXP, a number, specifies which parenthesized expression in the last
2779   regexp.
2780 Value is nil if SUBEXPth pair didn't match, or there were less than
2781   SUBEXP pairs.
2782 Zero means the entire text matched by the whole regexp or whole string.  */)
2783   (Lisp_Object subexp)
2784 {
2785   return match_limit (subexp, 0);
2786 }
2787
2788 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2789        doc: /* Return a list containing all info on what the last search matched.
2790 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2791 All the elements are markers or nil (nil if the Nth pair didn't match)
2792 if the last match was on a buffer; integers or nil if a string was matched.
2793 Use `set-match-data' to reinstate the data in this list.
2794
2795 If INTEGERS (the optional first argument) is non-nil, always use
2796 integers \(rather than markers) to represent buffer positions.  In
2797 this case, and if the last match was in a buffer, the buffer will get
2798 stored as one additional element at the end of the list.
2799
2800 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2801 enough to hold all the values, and if INTEGERS is non-nil, no consing
2802 is done.
2803
2804 If optional third arg RESEAT is non-nil, any previous markers on the
2805 REUSE list will be modified to point to nowhere.
2806
2807 Return value is undefined if the last search failed.  */)
2808   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2809 {
2810   Lisp_Object tail, prev;
2811   Lisp_Object *data;
2812   ptrdiff_t i, len;
2813
2814   if (!NILP (reseat))
2815     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2816       if (MARKERP (XCAR (tail)))
2817         {
2818           unchain_marker (XMARKER (XCAR (tail)));
2819           XSETCAR (tail, Qnil);
2820         }
2821
2822   if (NILP (last_thing_searched))
2823     return Qnil;
2824
2825   prev = Qnil;
2826
2827   USE_SAFE_ALLOCA;
2828   SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
2829
2830   len = 0;
2831   for (i = 0; i < search_regs.num_regs; i++)
2832     {
2833       ptrdiff_t start = search_regs.start[i];
2834       if (start >= 0)
2835         {
2836           if (EQ (last_thing_searched, Qt)
2837               || ! NILP (integers))
2838             {
2839               XSETFASTINT (data[2 * i], start);
2840               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2841             }
2842           else if (BUFFERP (last_thing_searched))
2843             {
2844               data[2 * i] = Fmake_marker ();
2845               Fset_marker (data[2 * i],
2846                            make_number (start),
2847                            last_thing_searched);
2848               data[2 * i + 1] = Fmake_marker ();
2849               Fset_marker (data[2 * i + 1],
2850                            make_number (search_regs.end[i]),
2851                            last_thing_searched);
2852             }
2853           else
2854             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2855             emacs_abort ();
2856
2857           len = 2 * i + 2;
2858         }
2859       else
2860         data[2 * i] = data[2 * i + 1] = Qnil;
2861     }
2862
2863   if (BUFFERP (last_thing_searched) && !NILP (integers))
2864     {
2865       data[len] = last_thing_searched;
2866       len++;
2867     }
2868
2869   /* If REUSE is not usable, cons up the values and return them.  */
2870   if (! CONSP (reuse))
2871     reuse = Flist (len, data);
2872   else
2873     {
2874       /* If REUSE is a list, store as many value elements as will fit
2875          into the elements of REUSE.  */
2876       for (i = 0, tail = reuse; CONSP (tail);
2877            i++, tail = XCDR (tail))
2878         {
2879           if (i < len)
2880             XSETCAR (tail, data[i]);
2881           else
2882             XSETCAR (tail, Qnil);
2883           prev = tail;
2884         }
2885
2886       /* If we couldn't fit all value elements into REUSE,
2887          cons up the rest of them and add them to the end of REUSE.  */
2888       if (i < len)
2889         XSETCDR (prev, Flist (len - i, data + i));
2890     }
2891
2892   SAFE_FREE ();
2893   return reuse;
2894 }
2895
2896 /* We used to have an internal use variant of `reseat' described as:
2897
2898       If RESEAT is `evaporate', put the markers back on the free list
2899       immediately.  No other references to the markers must exist in this
2900       case, so it is used only internally on the unwind stack and
2901       save-match-data from Lisp.
2902
2903    But it was ill-conceived: those supposedly-internal markers get exposed via
2904    the undo-list, so freeing them here is unsafe.  */
2905
2906 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2907        doc: /* Set internal data on last search match from elements of LIST.
2908 LIST should have been created by calling `match-data' previously.
2909
2910 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2911   (register Lisp_Object list, Lisp_Object reseat)
2912 {
2913   ptrdiff_t i;
2914   register Lisp_Object marker;
2915
2916   if (running_asynch_code)
2917     save_search_regs ();
2918
2919   CHECK_LIST (list);
2920
2921   /* Unless we find a marker with a buffer or an explicit buffer
2922      in LIST, assume that this match data came from a string.  */
2923   last_thing_searched = Qt;
2924
2925   /* Allocate registers if they don't already exist.  */
2926   {
2927     EMACS_INT length = XFASTINT (Flength (list)) / 2;
2928
2929     if (length > search_regs.num_regs)
2930       {
2931         ptrdiff_t num_regs = search_regs.num_regs;
2932         if (PTRDIFF_MAX < length)
2933           memory_full (SIZE_MAX);
2934         search_regs.start =
2935           xpalloc (search_regs.start, &num_regs, length - num_regs,
2936                    min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2937         search_regs.end =
2938           xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2939
2940         for (i = search_regs.num_regs; i < num_regs; i++)
2941           search_regs.start[i] = -1;
2942
2943         search_regs.num_regs = num_regs;
2944       }
2945
2946     for (i = 0; CONSP (list); i++)
2947       {
2948         marker = XCAR (list);
2949         if (BUFFERP (marker))
2950           {
2951             last_thing_searched = marker;
2952             break;
2953           }
2954         if (i >= length)
2955           break;
2956         if (NILP (marker))
2957           {
2958             search_regs.start[i] = -1;
2959             list = XCDR (list);
2960           }
2961         else
2962           {
2963             Lisp_Object from;
2964             Lisp_Object m;
2965
2966             m = marker;
2967             if (MARKERP (marker))
2968               {
2969                 if (XMARKER (marker)->buffer == 0)
2970                   XSETFASTINT (marker, 0);
2971                 else
2972                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2973               }
2974
2975             CHECK_NUMBER_COERCE_MARKER (marker);
2976             from = marker;
2977
2978             if (!NILP (reseat) && MARKERP (m))
2979               {
2980                 unchain_marker (XMARKER (m));
2981                 XSETCAR (list, Qnil);
2982               }
2983
2984             if ((list = XCDR (list), !CONSP (list)))
2985               break;
2986
2987             m = marker = XCAR (list);
2988
2989             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2990               XSETFASTINT (marker, 0);
2991
2992             CHECK_NUMBER_COERCE_MARKER (marker);
2993             if ((XINT (from) < 0
2994                  ? TYPE_MINIMUM (regoff_t) <= XINT (from)
2995                  : XINT (from) <= TYPE_MAXIMUM (regoff_t))
2996                 && (XINT (marker) < 0
2997                     ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
2998                     : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
2999               {
3000                 search_regs.start[i] = XINT (from);
3001                 search_regs.end[i] = XINT (marker);
3002               }
3003             else
3004               {
3005                 search_regs.start[i] = -1;
3006               }
3007
3008             if (!NILP (reseat) && MARKERP (m))
3009               {
3010                 unchain_marker (XMARKER (m));
3011                 XSETCAR (list, Qnil);
3012               }
3013           }
3014         list = XCDR (list);
3015       }
3016
3017     for (; i < search_regs.num_regs; i++)
3018       search_regs.start[i] = -1;
3019   }
3020
3021   return Qnil;
3022 }
3023
3024 /* If true the match data have been saved in saved_search_regs
3025    during the execution of a sentinel or filter. */
3026 static bool search_regs_saved;
3027 static struct re_registers saved_search_regs;
3028 static Lisp_Object saved_last_thing_searched;
3029
3030 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3031    if asynchronous code (filter or sentinel) is running. */
3032 static void
3033 save_search_regs (void)
3034 {
3035   if (!search_regs_saved)
3036     {
3037       saved_search_regs.num_regs = search_regs.num_regs;
3038       saved_search_regs.start = search_regs.start;
3039       saved_search_regs.end = search_regs.end;
3040       saved_last_thing_searched = last_thing_searched;
3041       last_thing_searched = Qnil;
3042       search_regs.num_regs = 0;
3043       search_regs.start = 0;
3044       search_regs.end = 0;
3045
3046       search_regs_saved = 1;
3047     }
3048 }
3049
3050 /* Called upon exit from filters and sentinels. */
3051 void
3052 restore_search_regs (void)
3053 {
3054   if (search_regs_saved)
3055     {
3056       if (search_regs.num_regs > 0)
3057         {
3058           xfree (search_regs.start);
3059           xfree (search_regs.end);
3060         }
3061       search_regs.num_regs = saved_search_regs.num_regs;
3062       search_regs.start = saved_search_regs.start;
3063       search_regs.end = saved_search_regs.end;
3064       last_thing_searched = saved_last_thing_searched;
3065       saved_last_thing_searched = Qnil;
3066       search_regs_saved = 0;
3067     }
3068 }
3069
3070 static void
3071 unwind_set_match_data (Lisp_Object list)
3072 {
3073   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3074   Fset_match_data (list, Qt);
3075 }
3076
3077 /* Called to unwind protect the match data.  */
3078 void
3079 record_unwind_save_match_data (void)
3080 {
3081   record_unwind_protect (unwind_set_match_data,
3082                          Fmatch_data (Qnil, Qnil, Qnil));
3083 }
3084
3085 /* Quote a string to deactivate reg-expr chars */
3086
3087 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3088        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3089   (Lisp_Object string)
3090 {
3091   char *in, *out, *end;
3092   char *temp;
3093   ptrdiff_t backslashes_added = 0;
3094
3095   CHECK_STRING (string);
3096
3097   USE_SAFE_ALLOCA;
3098   SAFE_NALLOCA (temp, 2, SBYTES (string));
3099
3100   /* Now copy the data into the new string, inserting escapes. */
3101
3102   in = SSDATA (string);
3103   end = in + SBYTES (string);
3104   out = temp;
3105
3106   for (; in != end; in++)
3107     {
3108       if (*in == '['
3109           || *in == '*' || *in == '.' || *in == '\\'
3110           || *in == '?' || *in == '+'
3111           || *in == '^' || *in == '$')
3112         *out++ = '\\', backslashes_added++;
3113       *out++ = *in;
3114     }
3115
3116   Lisp_Object result
3117     = make_specified_string (temp,
3118                              SCHARS (string) + backslashes_added,
3119                              out - temp,
3120                              STRING_MULTIBYTE (string));
3121   SAFE_FREE ();
3122   return result;
3123 }
3124
3125 /* Like find_newline, but doesn't use the cache, and only searches forward.  */
3126 static ptrdiff_t
3127 find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
3128                ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
3129                ptrdiff_t *bytepos, bool allow_quit)
3130 {
3131   if (count > 0)
3132     {
3133       if (!end)
3134         end = ZV, end_byte = ZV_BYTE;
3135     }
3136   else
3137     {
3138       if (!end)
3139         end = BEGV, end_byte = BEGV_BYTE;
3140     }
3141   if (end_byte == -1)
3142     end_byte = CHAR_TO_BYTE (end);
3143
3144   if (shortage != 0)
3145     *shortage = 0;
3146
3147   immediate_quit = allow_quit;
3148
3149   if (count > 0)
3150     while (start != end)
3151       {
3152         /* Our innermost scanning loop is very simple; it doesn't know
3153            about gaps, buffer ends, or the newline cache.  ceiling is
3154            the position of the last character before the next such
3155            obstacle --- the last character the dumb search loop should
3156            examine.  */
3157         ptrdiff_t tem, ceiling_byte = end_byte - 1;
3158
3159         if (start_byte == -1)
3160           start_byte = CHAR_TO_BYTE (start);
3161
3162         /* The dumb loop can only scan text stored in contiguous
3163            bytes. BUFFER_CEILING_OF returns the last character
3164            position that is contiguous, so the ceiling is the
3165            position after that.  */
3166         tem = BUFFER_CEILING_OF (start_byte);
3167         ceiling_byte = min (tem, ceiling_byte);
3168
3169         {
3170           /* The termination address of the dumb loop.  */
3171           unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
3172           ptrdiff_t lim_byte = ceiling_byte + 1;
3173
3174           /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
3175              of the base, the cursor, and the next line.  */
3176           ptrdiff_t base = start_byte - lim_byte;
3177           ptrdiff_t cursor, next;
3178
3179           for (cursor = base; cursor < 0; cursor = next)
3180             {
3181               /* The dumb loop.  */
3182               unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
3183               next = nl ? nl - lim_addr : 0;
3184
3185               if (! nl)
3186                 break;
3187               next++;
3188
3189               if (--count == 0)
3190                 {
3191                   immediate_quit = 0;
3192                   if (bytepos)
3193                     *bytepos = lim_byte + next;
3194                   return BYTE_TO_CHAR (lim_byte + next);
3195                 }
3196             }
3197
3198           start_byte = lim_byte;
3199           start = BYTE_TO_CHAR (start_byte);
3200         }
3201       }
3202
3203   immediate_quit = 0;
3204   if (shortage)
3205     *shortage = count;
3206   if (bytepos)
3207     {
3208       *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
3209       eassert (*bytepos == CHAR_TO_BYTE (start));
3210     }
3211   return start;
3212 }
3213
3214 DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
3215        0, 1, 0,
3216        doc: /* Check the newline cache of BUFFER against buffer contents.
3217
3218 BUFFER defaults to the current buffer.
3219
3220 Value is an array of 2 sub-arrays of buffer positions for newlines,
3221 the first based on the cache, the second based on actually scanning
3222 the buffer.  If the buffer doesn't have a cache, the value is nil.  */)
3223   (Lisp_Object buffer)
3224 {
3225   struct buffer *buf, *old = NULL;
3226   ptrdiff_t shortage, nl_count_cache, nl_count_buf;
3227   Lisp_Object cache_newlines, buf_newlines, val;
3228   ptrdiff_t from, found, i;
3229
3230   if (NILP (buffer))
3231     buf = current_buffer;
3232   else
3233     {
3234       CHECK_BUFFER (buffer);
3235       buf = XBUFFER (buffer);
3236       old = current_buffer;
3237     }
3238   if (buf->base_buffer)
3239     buf = buf->base_buffer;
3240
3241   /* If the buffer doesn't have a newline cache, return nil.  */
3242   if (NILP (BVAR (buf, cache_long_scans))
3243       || buf->newline_cache == NULL)
3244     return Qnil;
3245
3246   /* find_newline can only work on the current buffer.  */
3247   if (old != NULL)
3248     set_buffer_internal_1 (buf);
3249
3250   /* How many newlines are there according to the cache?  */
3251   find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3252                 TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
3253   nl_count_cache = TYPE_MAXIMUM (ptrdiff_t) - shortage;
3254
3255   /* Create vector and populate it.  */
3256   cache_newlines = make_uninit_vector (nl_count_cache);
3257
3258   if (nl_count_cache)
3259     {
3260       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3261         {
3262           ptrdiff_t from_byte = CHAR_TO_BYTE (from);
3263
3264           found = find_newline (from, from_byte, 0, -1, 1, &shortage,
3265                                 NULL, true);
3266           if (shortage != 0 || i >= nl_count_cache)
3267             break;
3268           ASET (cache_newlines, i, make_number (found - 1));
3269         }
3270       /* Fill the rest of slots with an invalid position.  */
3271       for ( ; i < nl_count_cache; i++)
3272         ASET (cache_newlines, i, make_number (-1));
3273     }
3274
3275   /* Now do the same, but without using the cache.  */
3276   find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
3277                  TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
3278   nl_count_buf = TYPE_MAXIMUM (ptrdiff_t) - shortage;
3279   buf_newlines = make_uninit_vector (nl_count_buf);
3280   if (nl_count_buf)
3281     {
3282       for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
3283         {
3284           ptrdiff_t from_byte = CHAR_TO_BYTE (from);
3285
3286           found = find_newline1 (from, from_byte, 0, -1, 1, &shortage,
3287                                  NULL, true);
3288           if (shortage != 0 || i >= nl_count_buf)
3289             break;
3290           ASET (buf_newlines, i, make_number (found - 1));
3291         }
3292       for ( ; i < nl_count_buf; i++)
3293         ASET (buf_newlines, i, make_number (-1));
3294     }
3295
3296   /* Construct the value and return it.  */
3297   val = make_uninit_vector (2);
3298   ASET (val, 0, cache_newlines);
3299   ASET (val, 1, buf_newlines);
3300
3301   if (old != NULL)
3302     set_buffer_internal_1 (old);
3303   return val;
3304 }
3305 \f
3306 void
3307 syms_of_search (void)
3308 {
3309   register int i;
3310
3311   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3312     {
3313       searchbufs[i].buf.allocated = 100;
3314       searchbufs[i].buf.buffer = xmalloc (100);
3315       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3316       searchbufs[i].regexp = Qnil;
3317       searchbufs[i].whitespace_regexp = Qnil;
3318       searchbufs[i].syntax_table = Qnil;
3319       staticpro (&searchbufs[i].regexp);
3320       staticpro (&searchbufs[i].whitespace_regexp);
3321       staticpro (&searchbufs[i].syntax_table);
3322       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3323     }
3324   searchbuf_head = &searchbufs[0];
3325
3326   /* Error condition used for failing searches.  */
3327   DEFSYM (Qsearch_failed, "search-failed");
3328
3329   /* Error condition signaled when regexp compile_pattern fails.  */
3330   DEFSYM (Qinvalid_regexp, "invalid-regexp");
3331
3332   Fput (Qsearch_failed, Qerror_conditions,
3333         listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
3334   Fput (Qsearch_failed, Qerror_message,
3335         build_pure_c_string ("Search failed"));
3336
3337   Fput (Qinvalid_regexp, Qerror_conditions,
3338         listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
3339   Fput (Qinvalid_regexp, Qerror_message,
3340         build_pure_c_string ("Invalid regexp"));
3341
3342   last_thing_searched = Qnil;
3343   staticpro (&last_thing_searched);
3344
3345   saved_last_thing_searched = Qnil;
3346   staticpro (&saved_last_thing_searched);
3347
3348   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3349       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3350 Some commands use this for user-specified regexps.
3351 Spaces that occur inside character classes or repetition operators
3352 or other such regexp constructs are not replaced with this.
3353 A value of nil (which is the normal value) means treat spaces literally.  */);
3354   Vsearch_spaces_regexp = Qnil;
3355
3356   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3357       doc: /* Internal use only.
3358 If non-nil, the primitive searching and matching functions
3359 such as `looking-at', `string-match', `re-search-forward', etc.,
3360 do not set the match data.  The proper way to use this variable
3361 is to bind it with `let' around a small expression.  */);
3362   Vinhibit_changing_match_data = Qnil;
3363
3364   defsubr (&Slooking_at);
3365   defsubr (&Sposix_looking_at);
3366   defsubr (&Sstring_match);
3367   defsubr (&Sposix_string_match);
3368   defsubr (&Ssearch_forward);
3369   defsubr (&Ssearch_backward);
3370   defsubr (&Sre_search_forward);
3371   defsubr (&Sre_search_backward);
3372   defsubr (&Sposix_search_forward);
3373   defsubr (&Sposix_search_backward);
3374   defsubr (&Sreplace_match);
3375   defsubr (&Smatch_beginning);
3376   defsubr (&Smatch_end);
3377   defsubr (&Smatch_data);
3378   defsubr (&Sset_match_data);
3379   defsubr (&Sregexp_quote);
3380   defsubr (&Snewline_cache_check);
3381 }