code.delx.au - gnu-emacs/blob - src/search.c

   1 /* String search routines for GNU Emacs.
   2
   3 Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2013 Free Software
   4 Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software: you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation, either version 3 of the License, or
  11 (at your option) any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
  20
  21
  22 #include <config.h>
  23
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "character.h"
  28 #include "buffer.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 static struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 static struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* Error condition signaled when regexp compile_pattern fails.  */
  88 static Lisp_Object Qinvalid_regexp;
  89
  90 /* Error condition used for failing searches.  */
  91 static Lisp_Object Qsearch_failed;
  92
  93 static void set_search_regs (ptrdiff_t, ptrdiff_t);
  94 static void save_search_regs (void);
  95 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
  96                                 ptrdiff_t, Lisp_Object, ptrdiff_t, ptrdiff_t,
  97                                 ptrdiff_t, ptrdiff_t);
  98 static EMACS_INT boyer_moore (EMACS_INT, unsigned char *, ptrdiff_t,
  99                               Lisp_Object, Lisp_Object, ptrdiff_t,
 100                               ptrdiff_t, int);
 101 static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
 102                                 ptrdiff_t, ptrdiff_t, EMACS_INT, int,
 103                                 Lisp_Object, Lisp_Object, int);
 104
 105 static _Noreturn void
 106 matcher_overflow (void)
 107 {
 108   error ("Stack overflow in regexp matcher");
 109 }
 110
 111 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 112    PATTERN is the pattern to compile.
 113    CP is the place to put the result.
 114    TRANSLATE is a translation table for ignoring case, or nil for none.
 115    POSIX is nonzero if we want full backtracking (POSIX style)
 116    for this pattern.  0 means backtrack only enough to get a valid match.
 117
 118    The behavior also depends on Vsearch_spaces_regexp.  */
 119
 120 static void
 121 compile_pattern_1 (struct regexp_cache *cp, Lisp_Object pattern, Lisp_Object translate, int posix)
 122 {
 123   char *val;
 124   reg_syntax_t old;
 125
 126   cp->regexp = Qnil;
 127   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 128   cp->posix = posix;
 129   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 130   cp->buf.charset_unibyte = charset_unibyte;
 131   if (STRINGP (Vsearch_spaces_regexp))
 132     cp->whitespace_regexp = Vsearch_spaces_regexp;
 133   else
 134     cp->whitespace_regexp = Qnil;
 135
 136   /* rms: I think BLOCK_INPUT is not needed here any more,
 137      because regex.c defines malloc to call xmalloc.
 138      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 139      So let's turn it off.  */
 140   /*  BLOCK_INPUT;  */
 141   old = re_set_syntax (RE_SYNTAX_EMACS
 142                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 143
 144   if (STRINGP (Vsearch_spaces_regexp))
 145     re_set_whitespace_regexp (SSDATA (Vsearch_spaces_regexp));
 146   else
 147     re_set_whitespace_regexp (NULL);
 148
 149   val = (char *) re_compile_pattern (SSDATA (pattern),
 150                                      SBYTES (pattern), &cp->buf);
 151
 152   /* If the compiled pattern hard codes some of the contents of the
 153      syntax-table, it can only be reused with *this* syntax table.  */
 154   cp->syntax_table = cp->buf.used_syntax ? BVAR (current_buffer, syntax_table) : Qt;
 155
 156   re_set_whitespace_regexp (NULL);
 157
 158   re_set_syntax (old);
 159   /* unblock_input ();  */
 160   if (val)
 161     xsignal1 (Qinvalid_regexp, build_string (val));
 162
 163   cp->regexp = Fcopy_sequence (pattern);
 164 }
 165
 166 /* Shrink each compiled regexp buffer in the cache
 167    to the size actually used right now.
 168    This is called from garbage collection.  */
 169
 170 void
 171 shrink_regexp_cache (void)
 172 {
 173   struct regexp_cache *cp;
 174
 175   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 176     {
 177       cp->buf.allocated = cp->buf.used;
 178       cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
 179     }
 180 }
 181
 182 /* Clear the regexp cache w.r.t. a particular syntax table,
 183    because it was changed.
 184    There is no danger of memory leak here because re_compile_pattern
 185    automagically manages the memory in each re_pattern_buffer struct,
 186    based on its `allocated' and `buffer' values.  */
 187 void
 188 clear_regexp_cache (void)
 189 {
 190   int i;
 191
 192   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 193     /* It's tempting to compare with the syntax-table we've actually changed,
 194        but it's not sufficient because char-table inheritance means that
 195        modifying one syntax-table can change others at the same time.  */
 196     if (!EQ (searchbufs[i].syntax_table, Qt))
 197       searchbufs[i].regexp = Qnil;
 198 }
 199
 200 /* Compile a regexp if necessary, but first check to see if there's one in
 201    the cache.
 202    PATTERN is the pattern to compile.
 203    TRANSLATE is a translation table for ignoring case, or nil for none.
 204    REGP is the structure that says where to store the "register"
 205    values that will result from matching this pattern.
 206    If it is 0, we should compile the pattern not to record any
 207    subexpression bounds.
 208    POSIX is nonzero if we want full backtracking (POSIX style)
 209    for this pattern.  0 means backtrack only enough to get a valid match.  */
 210
 211 struct re_pattern_buffer *
 212 compile_pattern (Lisp_Object pattern, struct re_registers *regp, Lisp_Object translate, int posix, int multibyte)
 213 {
 214   struct regexp_cache *cp, **cpp;
 215
 216   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 217     {
 218       cp = *cpp;
 219       /* Entries are initialized to nil, and may be set to nil by
 220          compile_pattern_1 if the pattern isn't valid.  Don't apply
 221          string accessors in those cases.  However, compile_pattern_1
 222          is only applied to the cache entry we pick here to reuse.  So
 223          nil should never appear before a non-nil entry.  */
 224       if (NILP (cp->regexp))
 225         goto compile_it;
 226       if (SCHARS (cp->regexp) == SCHARS (pattern)
 227           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 228           && !NILP (Fstring_equal (cp->regexp, pattern))
 229           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 230           && cp->posix == posix
 231           && (EQ (cp->syntax_table, Qt)
 232               || EQ (cp->syntax_table, BVAR (current_buffer, syntax_table)))
 233           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 234           && cp->buf.charset_unibyte == charset_unibyte)
 235         break;
 236
 237       /* If we're at the end of the cache, compile into the nil cell
 238          we found, or the last (least recently used) cell with a
 239          string value.  */
 240       if (cp->next == 0)
 241         {
 242         compile_it:
 243           compile_pattern_1 (cp, pattern, translate, posix);
 244           break;
 245         }
 246     }
 247
 248   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 249      either because we found it in the cache or because we just compiled it.
 250      Move it to the front of the queue to mark it as most recently used.  */
 251   *cpp = cp->next;
 252   cp->next = searchbuf_head;
 253   searchbuf_head = cp;
 254
 255   /* Advise the searching functions about the space we have allocated
 256      for register data.  */
 257   if (regp)
 258     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 259
 260   /* The compiled pattern can be used both for multibyte and unibyte
 261      target.  But, we have to tell which the pattern is used for. */
 262   cp->buf.target_multibyte = multibyte;
 263
 264   return &cp->buf;
 265 }
 266
 267 \f
 268 static Lisp_Object
 269 looking_at_1 (Lisp_Object string, int posix)
 270 {
 271   Lisp_Object val;
 272   unsigned char *p1, *p2;
 273   ptrdiff_t s1, s2;
 274   register ptrdiff_t i;
 275   struct re_pattern_buffer *bufp;
 276
 277   if (running_asynch_code)
 278     save_search_regs ();
 279
 280   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 281   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 282                          BVAR (current_buffer, case_eqv_table));
 283
 284   CHECK_STRING (string);
 285   bufp = compile_pattern (string,
 286                           (NILP (Vinhibit_changing_match_data)
 287                            ? &search_regs : NULL),
 288                           (!NILP (BVAR (current_buffer, case_fold_search))
 289                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 290                           posix,
 291                           !NILP (BVAR (current_buffer, enable_multibyte_characters)));
 292
 293   immediate_quit = 1;
 294   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 295
 296   /* Get pointers and sizes of the two strings
 297      that make up the visible portion of the buffer. */
 298
 299   p1 = BEGV_ADDR;
 300   s1 = GPT_BYTE - BEGV_BYTE;
 301   p2 = GAP_END_ADDR;
 302   s2 = ZV_BYTE - GPT_BYTE;
 303   if (s1 < 0)
 304     {
 305       p2 = p1;
 306       s2 = ZV_BYTE - BEGV_BYTE;
 307       s1 = 0;
 308     }
 309   if (s2 < 0)
 310     {
 311       s1 = ZV_BYTE - BEGV_BYTE;
 312       s2 = 0;
 313     }
 314
 315   re_match_object = Qnil;
 316
 317   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 318                   PT_BYTE - BEGV_BYTE,
 319                   (NILP (Vinhibit_changing_match_data)
 320                    ? &search_regs : NULL),
 321                   ZV_BYTE - BEGV_BYTE);
 322   immediate_quit = 0;
 323
 324   if (i == -2)
 325     matcher_overflow ();
 326
 327   val = (0 <= i ? Qt : Qnil);
 328   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 329     for (i = 0; i < search_regs.num_regs; i++)
 330       if (search_regs.start[i] >= 0)
 331         {
 332           search_regs.start[i]
 333             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 334           search_regs.end[i]
 335             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 336         }
 337
 338   /* Set last_thing_searched only when match data is changed.  */
 339   if (NILP (Vinhibit_changing_match_data))
 340     XSETBUFFER (last_thing_searched, current_buffer);
 341
 342   return val;
 343 }
 344
 345 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 346        doc: /* Return t if text after point matches regular expression REGEXP.
 347 This function modifies the match data that `match-beginning',
 348 `match-end' and `match-data' access; save and restore the match
 349 data if you want to preserve them.  */)
 350   (Lisp_Object regexp)
 351 {
 352   return looking_at_1 (regexp, 0);
 353 }
 354
 355 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 356        doc: /* Return t if text after point matches regular expression REGEXP.
 357 Find the longest match, in accord with Posix regular expression rules.
 358 This function modifies the match data that `match-beginning',
 359 `match-end' and `match-data' access; save and restore the match
 360 data if you want to preserve them.  */)
 361   (Lisp_Object regexp)
 362 {
 363   return looking_at_1 (regexp, 1);
 364 }
 365 \f
 366 static Lisp_Object
 367 string_match_1 (Lisp_Object regexp, Lisp_Object string, Lisp_Object start, int posix)
 368 {
 369   ptrdiff_t val;
 370   struct re_pattern_buffer *bufp;
 371   EMACS_INT pos;
 372   ptrdiff_t pos_byte, i;
 373
 374   if (running_asynch_code)
 375     save_search_regs ();
 376
 377   CHECK_STRING (regexp);
 378   CHECK_STRING (string);
 379
 380   if (NILP (start))
 381     pos = 0, pos_byte = 0;
 382   else
 383     {
 384       ptrdiff_t len = SCHARS (string);
 385
 386       CHECK_NUMBER (start);
 387       pos = XINT (start);
 388       if (pos < 0 && -pos <= len)
 389         pos = len + pos;
 390       else if (0 > pos || pos > len)
 391         args_out_of_range (string, start);
 392       pos_byte = string_char_to_byte (string, pos);
 393     }
 394
 395   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 396   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 397                          BVAR (current_buffer, case_eqv_table));
 398
 399   bufp = compile_pattern (regexp,
 400                           (NILP (Vinhibit_changing_match_data)
 401                            ? &search_regs : NULL),
 402                           (!NILP (BVAR (current_buffer, case_fold_search))
 403                            ? BVAR (current_buffer, case_canon_table) : Qnil),
 404                           posix,
 405                           STRING_MULTIBYTE (string));
 406   immediate_quit = 1;
 407   re_match_object = string;
 408
 409   val = re_search (bufp, SSDATA (string),
 410                    SBYTES (string), pos_byte,
 411                    SBYTES (string) - pos_byte,
 412                    (NILP (Vinhibit_changing_match_data)
 413                     ? &search_regs : NULL));
 414   immediate_quit = 0;
 415
 416   /* Set last_thing_searched only when match data is changed.  */
 417   if (NILP (Vinhibit_changing_match_data))
 418     last_thing_searched = Qt;
 419
 420   if (val == -2)
 421     matcher_overflow ();
 422   if (val < 0) return Qnil;
 423
 424   if (NILP (Vinhibit_changing_match_data))
 425     for (i = 0; i < search_regs.num_regs; i++)
 426       if (search_regs.start[i] >= 0)
 427         {
 428           search_regs.start[i]
 429             = string_byte_to_char (string, search_regs.start[i]);
 430           search_regs.end[i]
 431             = string_byte_to_char (string, search_regs.end[i]);
 432         }
 433
 434   return make_number (string_byte_to_char (string, val));
 435 }
 436
 437 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 438        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 439 Matching ignores case if `case-fold-search' is non-nil.
 440 If third arg START is non-nil, start search at that index in STRING.
 441 For index of first char beyond the match, do (match-end 0).
 442 `match-end' and `match-beginning' also give indices of substrings
 443 matched by parenthesis constructs in the pattern.
 444
 445 You can use the function `match-string' to extract the substrings
 446 matched by the parenthesis constructions in REGEXP. */)
 447   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 448 {
 449   return string_match_1 (regexp, string, start, 0);
 450 }
 451
 452 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 453        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 454 Find the longest match, in accord with Posix regular expression rules.
 455 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 456 If third arg START is non-nil, start search at that index in STRING.
 457 For index of first char beyond the match, do (match-end 0).
 458 `match-end' and `match-beginning' also give indices of substrings
 459 matched by parenthesis constructs in the pattern.  */)
 460   (Lisp_Object regexp, Lisp_Object string, Lisp_Object start)
 461 {
 462   return string_match_1 (regexp, string, start, 1);
 463 }
 464
 465 /* Match REGEXP against STRING, searching all of STRING,
 466    and return the index of the match, or negative on failure.
 467    This does not clobber the match data.  */
 468
 469 ptrdiff_t
 470 fast_string_match (Lisp_Object regexp, Lisp_Object string)
 471 {
 472   ptrdiff_t val;
 473   struct re_pattern_buffer *bufp;
 474
 475   bufp = compile_pattern (regexp, 0, Qnil,
 476                           0, STRING_MULTIBYTE (string));
 477   immediate_quit = 1;
 478   re_match_object = string;
 479
 480   val = re_search (bufp, SSDATA (string),
 481                    SBYTES (string), 0,
 482                    SBYTES (string), 0);
 483   immediate_quit = 0;
 484   return val;
 485 }
 486
 487 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 488    and return the index of the match, or negative on failure.
 489    This does not clobber the match data.
 490    We assume that STRING contains single-byte characters.  */
 491
 492 ptrdiff_t
 493 fast_c_string_match_ignore_case (Lisp_Object regexp,
 494                                  const char *string, ptrdiff_t len)
 495 {
 496   ptrdiff_t val;
 497   struct re_pattern_buffer *bufp;
 498
 499   regexp = string_make_unibyte (regexp);
 500   re_match_object = Qt;
 501   bufp = compile_pattern (regexp, 0,
 502                           Vascii_canon_table, 0,
 503                           0);
 504   immediate_quit = 1;
 505   val = re_search (bufp, string, len, 0, len, 0);
 506   immediate_quit = 0;
 507   return val;
 508 }
 509
 510 /* Like fast_string_match but ignore case.  */
 511
 512 ptrdiff_t
 513 fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
 514 {
 515   ptrdiff_t val;
 516   struct re_pattern_buffer *bufp;
 517
 518   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 519                           0, STRING_MULTIBYTE (string));
 520   immediate_quit = 1;
 521   re_match_object = string;
 522
 523   val = re_search (bufp, SSDATA (string),
 524                    SBYTES (string), 0,
 525                    SBYTES (string), 0);
 526   immediate_quit = 0;
 527   return val;
 528 }
 529 \f
 530 /* Match REGEXP against the characters after POS to LIMIT, and return
 531    the number of matched characters.  If STRING is non-nil, match
 532    against the characters in it.  In that case, POS and LIMIT are
 533    indices into the string.  This function doesn't modify the match
 534    data.  */
 535
 536 ptrdiff_t
 537 fast_looking_at (Lisp_Object regexp, ptrdiff_t pos, ptrdiff_t pos_byte, ptrdiff_t limit, ptrdiff_t limit_byte, Lisp_Object string)
 538 {
 539   int multibyte;
 540   struct re_pattern_buffer *buf;
 541   unsigned char *p1, *p2;
 542   ptrdiff_t s1, s2;
 543   ptrdiff_t len;
 544
 545   if (STRINGP (string))
 546     {
 547       if (pos_byte < 0)
 548         pos_byte = string_char_to_byte (string, pos);
 549       if (limit_byte < 0)
 550         limit_byte = string_char_to_byte (string, limit);
 551       p1 = NULL;
 552       s1 = 0;
 553       p2 = SDATA (string);
 554       s2 = SBYTES (string);
 555       re_match_object = string;
 556       multibyte = STRING_MULTIBYTE (string);
 557     }
 558   else
 559     {
 560       if (pos_byte < 0)
 561         pos_byte = CHAR_TO_BYTE (pos);
 562       if (limit_byte < 0)
 563         limit_byte = CHAR_TO_BYTE (limit);
 564       pos_byte -= BEGV_BYTE;
 565       limit_byte -= BEGV_BYTE;
 566       p1 = BEGV_ADDR;
 567       s1 = GPT_BYTE - BEGV_BYTE;
 568       p2 = GAP_END_ADDR;
 569       s2 = ZV_BYTE - GPT_BYTE;
 570       if (s1 < 0)
 571         {
 572           p2 = p1;
 573           s2 = ZV_BYTE - BEGV_BYTE;
 574           s1 = 0;
 575         }
 576       if (s2 < 0)
 577         {
 578           s1 = ZV_BYTE - BEGV_BYTE;
 579           s2 = 0;
 580         }
 581       re_match_object = Qnil;
 582       multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
 583     }
 584
 585   buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
 586   immediate_quit = 1;
 587   len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
 588                     pos_byte, NULL, limit_byte);
 589   immediate_quit = 0;
 590
 591   return len;
 592 }
 593
 594 \f
 595 /* The newline cache: remembering which sections of text have no newlines.  */
 596
 597 /* If the user has requested newline caching, make sure it's on.
 598    Otherwise, make sure it's off.
 599    This is our cheezy way of associating an action with the change of
 600    state of a buffer-local variable.  */
 601 static void
 602 newline_cache_on_off (struct buffer *buf)
 603 {
 604   if (NILP (BVAR (buf, cache_long_line_scans)))
 605     {
 606       /* It should be off.  */
 607       if (buf->newline_cache)
 608         {
 609           free_region_cache (buf->newline_cache);
 610           buf->newline_cache = 0;
 611         }
 612     }
 613   else
 614     {
 615       /* It should be on.  */
 616       if (buf->newline_cache == 0)
 617         buf->newline_cache = new_region_cache ();
 618     }
 619 }
 620
 621 \f
 622 /* Search for COUNT newlines between START and END.
 623
 624    If COUNT is positive, search forwards; END must be >= START.
 625    If COUNT is negative, search backwards for the -COUNTth instance;
 626       END must be <= START.
 627    If COUNT is zero, do anything you please; run rogue, for all I care.
 628
 629    If END is zero, use BEGV or ZV instead, as appropriate for the
 630    direction indicated by COUNT.
 631
 632    If we find COUNT instances, set *SHORTAGE to zero, and return the
 633    position past the COUNTth match.  Note that for reverse motion
 634    this is not the same as the usual convention for Emacs motion commands.
 635
 636    If we don't find COUNT instances before reaching END, set *SHORTAGE
 637    to the number of newlines left unfound, and return END.
 638
 639    If ALLOW_QUIT, set immediate_quit.  That's good to do
 640    except when inside redisplay.  */
 641
 642 ptrdiff_t
 643 find_newline (ptrdiff_t start, ptrdiff_t end,
 644               ptrdiff_t count, ptrdiff_t *shortage, bool allow_quit)
 645 {
 646   struct region_cache *newline_cache;
 647   ptrdiff_t end_byte = -1;
 648   int direction;
 649
 650   if (count > 0)
 651     {
 652       direction = 1;
 653       if (!end)
 654         end = ZV, end_byte = ZV_BYTE;
 655     }
 656   else
 657     {
 658       direction = -1;
 659       if (!end)
 660         end = BEGV, end_byte = BEGV_BYTE;
 661     }
 662   if (end_byte == -1)
 663     end_byte = CHAR_TO_BYTE (end);
 664
 665   newline_cache_on_off (current_buffer);
 666   newline_cache = current_buffer->newline_cache;
 667
 668   if (shortage != 0)
 669     *shortage = 0;
 670
 671   immediate_quit = allow_quit;
 672
 673   if (count > 0)
 674     while (start != end)
 675       {
 676         /* Our innermost scanning loop is very simple; it doesn't know
 677            about gaps, buffer ends, or the newline cache.  ceiling is
 678            the position of the last character before the next such
 679            obstacle --- the last character the dumb search loop should
 680            examine.  */
 681         ptrdiff_t ceiling_byte = end_byte - 1;
 682         ptrdiff_t start_byte;
 683         ptrdiff_t tem;
 684
 685         /* If we're looking for a newline, consult the newline cache
 686            to see where we can avoid some scanning.  */
 687         if (newline_cache)
 688           {
 689             ptrdiff_t next_change;
 690             immediate_quit = 0;
 691             while (region_cache_forward
 692                    (current_buffer, newline_cache, start, &next_change))
 693               start = next_change;
 694             immediate_quit = allow_quit;
 695
 696             start_byte = CHAR_TO_BYTE (start);
 697
 698             /* START should never be after END.  */
 699             if (start_byte > ceiling_byte)
 700               start_byte = ceiling_byte;
 701
 702             /* Now the text after start is an unknown region, and
 703                next_change is the position of the next known region. */
 704             ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
 705           }
 706         else
 707           start_byte = CHAR_TO_BYTE (start);
 708
 709         /* The dumb loop can only scan text stored in contiguous
 710            bytes. BUFFER_CEILING_OF returns the last character
 711            position that is contiguous, so the ceiling is the
 712            position after that.  */
 713         tem = BUFFER_CEILING_OF (start_byte);
 714         ceiling_byte = min (tem, ceiling_byte);
 715
 716         {
 717           /* The termination address of the dumb loop.  */
 718           register unsigned char *ceiling_addr
 719             = BYTE_POS_ADDR (ceiling_byte) + 1;
 720           register unsigned char *cursor
 721             = BYTE_POS_ADDR (start_byte);
 722           unsigned char *base = cursor;
 723
 724           while (cursor < ceiling_addr)
 725             {
 726               /* The dumb loop.  */
 727               unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor);
 728
 729               /* If we're looking for newlines, cache the fact that
 730                  the region from start to cursor is free of them. */
 731               if (newline_cache)
 732                 {
 733                   unsigned char *low = cursor;
 734                   unsigned char *lim = nl ? nl : ceiling_addr;
 735                   know_region_cache (current_buffer, newline_cache,
 736                                      BYTE_TO_CHAR (low - base + start_byte),
 737                                      BYTE_TO_CHAR (lim - base + start_byte));
 738                 }
 739
 740               if (! nl)
 741                 break;
 742
 743               if (--count == 0)
 744                 {
 745                   immediate_quit = 0;
 746                   return BYTE_TO_CHAR (nl + 1 - base + start_byte);
 747                 }
 748               cursor = nl + 1;
 749             }
 750
 751           start = BYTE_TO_CHAR (ceiling_addr - base + start_byte);
 752         }
 753       }
 754   else
 755     while (start > end)
 756       {
 757         /* The last character to check before the next obstacle.  */
 758         ptrdiff_t ceiling_byte = end_byte;
 759         ptrdiff_t start_byte;
 760         ptrdiff_t tem;
 761
 762         /* Consult the newline cache, if appropriate.  */
 763         if (newline_cache)
 764           {
 765             ptrdiff_t next_change;
 766             immediate_quit = 0;
 767             while (region_cache_backward
 768                    (current_buffer, newline_cache, start, &next_change))
 769               start = next_change;
 770             immediate_quit = allow_quit;
 771
 772             start_byte = CHAR_TO_BYTE (start);
 773
 774             /* Start should never be at or before end.  */
 775             if (start_byte <= ceiling_byte)
 776               start_byte = ceiling_byte + 1;
 777
 778             /* Now the text before start is an unknown region, and
 779                next_change is the position of the next known region. */
 780             ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
 781           }
 782         else
 783           start_byte = CHAR_TO_BYTE (start);
 784
 785         /* Stop scanning before the gap.  */
 786         tem = BUFFER_FLOOR_OF (start_byte - 1);
 787         ceiling_byte = max (tem, ceiling_byte);
 788
 789         {
 790           /* The termination address of the dumb loop.  */
 791           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 792           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 793           unsigned char *base = cursor;
 794
 795           while (cursor >= ceiling_addr)
 796             {
 797               unsigned char *nl = memrchr (ceiling_addr, '\n',
 798                                            cursor + 1 - ceiling_addr);
 799
 800               /* If we're looking for newlines, cache the fact that
 801                  the region from after the cursor to start is free of them.  */
 802               if (newline_cache)
 803                 {
 804                   unsigned char *low = nl ? nl : ceiling_addr - 1;
 805                   unsigned char *lim = cursor;
 806                   know_region_cache (current_buffer, newline_cache,
 807                                      BYTE_TO_CHAR (low - base + start_byte),
 808                                      BYTE_TO_CHAR (lim - base + start_byte));
 809                 }
 810
 811               if (! nl)
 812                 break;
 813
 814               if (++count >= 0)
 815                 {
 816                   immediate_quit = 0;
 817                   return BYTE_TO_CHAR (nl - base + start_byte);
 818                 }
 819               cursor = nl - 1;
 820             }
 821
 822           start = BYTE_TO_CHAR (ceiling_addr - 1 - base + start_byte);
 823         }
 824       }
 825
 826   immediate_quit = 0;
 827   if (shortage != 0)
 828     *shortage = count * direction;
 829   return start;
 830 }
 831 \f
 832 /* Search for COUNT instances of a line boundary.
 833    Start at START.  If COUNT is negative, search backwards.
 834
 835    We report the resulting position by calling TEMP_SET_PT_BOTH.
 836
 837    If we find COUNT instances. we position after (always after,
 838    even if scanning backwards) the COUNTth match, and return 0.
 839
 840    If we don't find COUNT instances before reaching the end of the
 841    buffer (or the beginning, if scanning backwards), we return
 842    the number of line boundaries left unfound, and position at
 843    the limit we bumped up against.
 844
 845    If ALLOW_QUIT, set immediate_quit.  That's good to do
 846    except in special cases.  */
 847
 848 EMACS_INT
 849 scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
 850               ptrdiff_t limit, ptrdiff_t limit_byte,
 851               EMACS_INT count, bool allow_quit)
 852 {
 853   int direction = ((count > 0) ? 1 : -1);
 854
 855   unsigned char *cursor;
 856   unsigned char *base;
 857
 858   ptrdiff_t ceiling;
 859   unsigned char *ceiling_addr;
 860
 861   bool old_immediate_quit = immediate_quit;
 862
 863   if (allow_quit)
 864     immediate_quit++;
 865
 866   if (count > 0)
 867     {
 868       while (start_byte < limit_byte)
 869         {
 870           ceiling =  BUFFER_CEILING_OF (start_byte);
 871           ceiling = min (limit_byte - 1, ceiling);
 872           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 873           base = (cursor = BYTE_POS_ADDR (start_byte));
 874
 875           do
 876             {
 877               unsigned char *nl = memchr (cursor, '\n', ceiling_addr - cursor);
 878               if (! nl)
 879                 break;
 880               if (--count == 0)
 881                 {
 882                   immediate_quit = old_immediate_quit;
 883                   start_byte += nl - base + 1;
 884                   start = BYTE_TO_CHAR (start_byte);
 885                   TEMP_SET_PT_BOTH (start, start_byte);
 886                   return 0;
 887                 }
 888               cursor = nl + 1;
 889             }
 890           while (cursor < ceiling_addr);
 891
 892           start_byte += ceiling_addr - base;
 893         }
 894     }
 895   else
 896     {
 897       while (start_byte > limit_byte)
 898         {
 899           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 900           ceiling = max (limit_byte, ceiling);
 901           ceiling_addr = BYTE_POS_ADDR (ceiling);
 902           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 903           while (1)
 904             {
 905               unsigned char *nl = memrchr (ceiling_addr, '\n',
 906                                            cursor - ceiling_addr);
 907               if (! nl)
 908                 break;
 909
 910               if (++count == 0)
 911                 {
 912                   immediate_quit = old_immediate_quit;
 913                   /* Return the position AFTER the match we found.  */
 914                   start_byte += nl - base + 1;
 915                   start = BYTE_TO_CHAR (start_byte);
 916                   TEMP_SET_PT_BOTH (start, start_byte);
 917                   return 0;
 918                 }
 919
 920               cursor = nl;
 921             }
 922           start_byte += ceiling_addr - base;
 923         }
 924     }
 925
 926   TEMP_SET_PT_BOTH (limit, limit_byte);
 927   immediate_quit = old_immediate_quit;
 928
 929   return count * direction;
 930 }
 931
 932 ptrdiff_t
 933 find_next_newline_no_quit (ptrdiff_t from, ptrdiff_t cnt)
 934 {
 935   return find_newline (from, 0, cnt, (ptrdiff_t *) 0, 0);
 936 }
 937
 938 /* Like find_next_newline, but returns position before the newline,
 939    not after, and only search up to TO.  This isn't just
 940    find_next_newline (...)-1, because you might hit TO.  */
 941
 942 ptrdiff_t
 943 find_before_next_newline (ptrdiff_t from, ptrdiff_t to, ptrdiff_t cnt)
 944 {
 945   ptrdiff_t shortage;
 946   ptrdiff_t pos = find_newline (from, to, cnt, &shortage, 1);
 947
 948   if (shortage == 0)
 949     pos--;
 950
 951   return pos;
 952 }
 953 \f
 954 /* Subroutines of Lisp buffer search functions. */
 955
 956 static Lisp_Object
 957 search_command (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror,
 958                 Lisp_Object count, int direction, int RE, int posix)
 959 {
 960   register EMACS_INT np;
 961   EMACS_INT lim;
 962   ptrdiff_t lim_byte;
 963   EMACS_INT n = direction;
 964
 965   if (!NILP (count))
 966     {
 967       CHECK_NUMBER (count);
 968       n *= XINT (count);
 969     }
 970
 971   CHECK_STRING (string);
 972   if (NILP (bound))
 973     {
 974       if (n > 0)
 975         lim = ZV, lim_byte = ZV_BYTE;
 976       else
 977         lim = BEGV, lim_byte = BEGV_BYTE;
 978     }
 979   else
 980     {
 981       CHECK_NUMBER_COERCE_MARKER (bound);
 982       lim = XINT (bound);
 983       if (n > 0 ? lim < PT : lim > PT)
 984         error ("Invalid search bound (wrong side of point)");
 985       if (lim > ZV)
 986         lim = ZV, lim_byte = ZV_BYTE;
 987       else if (lim < BEGV)
 988         lim = BEGV, lim_byte = BEGV_BYTE;
 989       else
 990         lim_byte = CHAR_TO_BYTE (lim);
 991     }
 992
 993   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 994   set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
 995                          BVAR (current_buffer, case_eqv_table));
 996
 997   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 998                       (!NILP (BVAR (current_buffer, case_fold_search))
 999                        ? BVAR (current_buffer, case_canon_table)
1000                        : Qnil),
1001                       (!NILP (BVAR (current_buffer, case_fold_search))
1002                        ? BVAR (current_buffer, case_eqv_table)
1003                        : Qnil),
1004                       posix);
1005   if (np <= 0)
1006     {
1007       if (NILP (noerror))
1008         xsignal1 (Qsearch_failed, string);
1009
1010       if (!EQ (noerror, Qt))
1011         {
1012           eassert (BEGV <= lim && lim <= ZV);
1013           SET_PT_BOTH (lim, lim_byte);
1014           return Qnil;
1015 #if 0 /* This would be clean, but maybe programs depend on
1016          a value of nil here.  */
1017           np = lim;
1018 #endif
1019         }
1020       else
1021         return Qnil;
1022     }
1023
1024   eassert (BEGV <= np && np <= ZV);
1025   SET_PT (np);
1026
1027   return make_number (np);
1028 }
1029 \f
1030 /* Return 1 if REGEXP it matches just one constant string.  */
1031
1032 static int
1033 trivial_regexp_p (Lisp_Object regexp)
1034 {
1035   ptrdiff_t len = SBYTES (regexp);
1036   unsigned char *s = SDATA (regexp);
1037   while (--len >= 0)
1038     {
1039       switch (*s++)
1040         {
1041         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1042           return 0;
1043         case '\\':
1044           if (--len < 0)
1045             return 0;
1046           switch (*s++)
1047             {
1048             case '|': case '(': case ')': case '`': case '\'': case 'b':
1049             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1050             case 'S': case '=': case '{': case '}': case '_':
1051             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1052             case '1': case '2': case '3': case '4': case '5':
1053             case '6': case '7': case '8': case '9':
1054               return 0;
1055             }
1056         }
1057     }
1058   return 1;
1059 }
1060
1061 /* Search for the n'th occurrence of STRING in the current buffer,
1062    starting at position POS and stopping at position LIM,
1063    treating STRING as a literal string if RE is false or as
1064    a regular expression if RE is true.
1065
1066    If N is positive, searching is forward and LIM must be greater than POS.
1067    If N is negative, searching is backward and LIM must be less than POS.
1068
1069    Returns -x if x occurrences remain to be found (x > 0),
1070    or else the position at the beginning of the Nth occurrence
1071    (if searching backward) or the end (if searching forward).
1072
1073    POSIX is nonzero if we want full backtracking (POSIX style)
1074    for this pattern.  0 means backtrack only enough to get a valid match.  */
1075
1076 #define TRANSLATE(out, trt, d)                  \
1077 do                                              \
1078   {                                             \
1079     if (! NILP (trt))                           \
1080       {                                         \
1081         Lisp_Object temp;                       \
1082         temp = Faref (trt, make_number (d));    \
1083         if (INTEGERP (temp))                    \
1084           out = XINT (temp);                    \
1085         else                                    \
1086           out = d;                              \
1087       }                                         \
1088     else                                        \
1089       out = d;                                  \
1090   }                                             \
1091 while (0)
1092
1093 /* Only used in search_buffer, to record the end position of the match
1094    when searching regexps and SEARCH_REGS should not be changed
1095    (i.e. Vinhibit_changing_match_data is non-nil).  */
1096 static struct re_registers search_regs_1;
1097
1098 static EMACS_INT
1099 search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
1100                ptrdiff_t lim, ptrdiff_t lim_byte, EMACS_INT n,
1101                int RE, Lisp_Object trt, Lisp_Object inverse_trt, int posix)
1102 {
1103   ptrdiff_t len = SCHARS (string);
1104   ptrdiff_t len_byte = SBYTES (string);
1105   register ptrdiff_t i;
1106
1107   if (running_asynch_code)
1108     save_search_regs ();
1109
1110   /* Searching 0 times means don't move.  */
1111   /* Null string is found at starting position.  */
1112   if (len == 0 || n == 0)
1113     {
1114       set_search_regs (pos_byte, 0);
1115       return pos;
1116     }
1117
1118   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1119     {
1120       unsigned char *p1, *p2;
1121       ptrdiff_t s1, s2;
1122       struct re_pattern_buffer *bufp;
1123
1124       bufp = compile_pattern (string,
1125                               (NILP (Vinhibit_changing_match_data)
1126                                ? &search_regs : &search_regs_1),
1127                               trt, posix,
1128                               !NILP (BVAR (current_buffer, enable_multibyte_characters)));
1129
1130       immediate_quit = 1;       /* Quit immediately if user types ^G,
1131                                    because letting this function finish
1132                                    can take too long. */
1133       QUIT;                     /* Do a pending quit right away,
1134                                    to avoid paradoxical behavior */
1135       /* Get pointers and sizes of the two strings
1136          that make up the visible portion of the buffer. */
1137
1138       p1 = BEGV_ADDR;
1139       s1 = GPT_BYTE - BEGV_BYTE;
1140       p2 = GAP_END_ADDR;
1141       s2 = ZV_BYTE - GPT_BYTE;
1142       if (s1 < 0)
1143         {
1144           p2 = p1;
1145           s2 = ZV_BYTE - BEGV_BYTE;
1146           s1 = 0;
1147         }
1148       if (s2 < 0)
1149         {
1150           s1 = ZV_BYTE - BEGV_BYTE;
1151           s2 = 0;
1152         }
1153       re_match_object = Qnil;
1154
1155       while (n < 0)
1156         {
1157           ptrdiff_t val;
1158
1159           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1160                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1161                              (NILP (Vinhibit_changing_match_data)
1162                               ? &search_regs : &search_regs_1),
1163                              /* Don't allow match past current point */
1164                              pos_byte - BEGV_BYTE);
1165           if (val == -2)
1166             {
1167               matcher_overflow ();
1168             }
1169           if (val >= 0)
1170             {
1171               if (NILP (Vinhibit_changing_match_data))
1172                 {
1173                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1174                   for (i = 0; i < search_regs.num_regs; i++)
1175                     if (search_regs.start[i] >= 0)
1176                       {
1177                         search_regs.start[i]
1178                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1179                         search_regs.end[i]
1180                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1181                       }
1182                   XSETBUFFER (last_thing_searched, current_buffer);
1183                   /* Set pos to the new position. */
1184                   pos = search_regs.start[0];
1185                 }
1186               else
1187                 {
1188                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1189                   /* Set pos to the new position.  */
1190                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1191                 }
1192             }
1193           else
1194             {
1195               immediate_quit = 0;
1196               return (n);
1197             }
1198           n++;
1199         }
1200       while (n > 0)
1201         {
1202           ptrdiff_t val;
1203
1204           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1205                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1206                              (NILP (Vinhibit_changing_match_data)
1207                               ? &search_regs : &search_regs_1),
1208                              lim_byte - BEGV_BYTE);
1209           if (val == -2)
1210             {
1211               matcher_overflow ();
1212             }
1213           if (val >= 0)
1214             {
1215               if (NILP (Vinhibit_changing_match_data))
1216                 {
1217                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1218                   for (i = 0; i < search_regs.num_regs; i++)
1219                     if (search_regs.start[i] >= 0)
1220                       {
1221                         search_regs.start[i]
1222                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1223                         search_regs.end[i]
1224                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1225                       }
1226                   XSETBUFFER (last_thing_searched, current_buffer);
1227                   pos = search_regs.end[0];
1228                 }
1229               else
1230                 {
1231                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1232                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1233                 }
1234             }
1235           else
1236             {
1237               immediate_quit = 0;
1238               return (0 - n);
1239             }
1240           n--;
1241         }
1242       immediate_quit = 0;
1243       return (pos);
1244     }
1245   else                          /* non-RE case */
1246     {
1247       unsigned char *raw_pattern, *pat;
1248       ptrdiff_t raw_pattern_size;
1249       ptrdiff_t raw_pattern_size_byte;
1250       unsigned char *patbuf;
1251       int multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
1252       unsigned char *base_pat;
1253       /* Set to positive if we find a non-ASCII char that need
1254          translation.  Otherwise set to zero later.  */
1255       int char_base = -1;
1256       int boyer_moore_ok = 1;
1257
1258       /* MULTIBYTE says whether the text to be searched is multibyte.
1259          We must convert PATTERN to match that, or we will not really
1260          find things right.  */
1261
1262       if (multibyte == STRING_MULTIBYTE (string))
1263         {
1264           raw_pattern = SDATA (string);
1265           raw_pattern_size = SCHARS (string);
1266           raw_pattern_size_byte = SBYTES (string);
1267         }
1268       else if (multibyte)
1269         {
1270           raw_pattern_size = SCHARS (string);
1271           raw_pattern_size_byte
1272             = count_size_as_multibyte (SDATA (string),
1273                                        raw_pattern_size);
1274           raw_pattern = alloca (raw_pattern_size_byte + 1);
1275           copy_text (SDATA (string), raw_pattern,
1276                      SCHARS (string), 0, 1);
1277         }
1278       else
1279         {
1280           /* Converting multibyte to single-byte.
1281
1282              ??? Perhaps this conversion should be done in a special way
1283              by subtracting nonascii-insert-offset from each non-ASCII char,
1284              so that only the multibyte chars which really correspond to
1285              the chosen single-byte character set can possibly match.  */
1286           raw_pattern_size = SCHARS (string);
1287           raw_pattern_size_byte = SCHARS (string);
1288           raw_pattern = alloca (raw_pattern_size + 1);
1289           copy_text (SDATA (string), raw_pattern,
1290                      SBYTES (string), 1, 0);
1291         }
1292
1293       /* Copy and optionally translate the pattern.  */
1294       len = raw_pattern_size;
1295       len_byte = raw_pattern_size_byte;
1296       patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
1297       pat = patbuf;
1298       base_pat = raw_pattern;
1299       if (multibyte)
1300         {
1301           /* Fill patbuf by translated characters in STRING while
1302              checking if we can use boyer-moore search.  If TRT is
1303              non-nil, we can use boyer-moore search only if TRT can be
1304              represented by the byte array of 256 elements.  For that,
1305              all non-ASCII case-equivalents of all case-sensitive
1306              characters in STRING must belong to the same character
1307              group (two characters belong to the same group iff their
1308              multibyte forms are the same except for the last byte;
1309              i.e. every 64 characters form a group; U+0000..U+003F,
1310              U+0040..U+007F, U+0080..U+00BF, ...).  */
1311
1312           while (--len >= 0)
1313             {
1314               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1315               int c, translated, inverse;
1316               int in_charlen, charlen;
1317
1318               /* If we got here and the RE flag is set, it's because we're
1319                  dealing with a regexp known to be trivial, so the backslash
1320                  just quotes the next character.  */
1321               if (RE && *base_pat == '\\')
1322                 {
1323                   len--;
1324                   raw_pattern_size--;
1325                   len_byte--;
1326                   base_pat++;
1327                 }
1328
1329               c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
1330
1331               if (NILP (trt))
1332                 {
1333                   str = base_pat;
1334                   charlen = in_charlen;
1335                 }
1336               else
1337                 {
1338                   /* Translate the character.  */
1339                   TRANSLATE (translated, trt, c);
1340                   charlen = CHAR_STRING (translated, str_base);
1341                   str = str_base;
1342
1343                   /* Check if C has any other case-equivalents.  */
1344                   TRANSLATE (inverse, inverse_trt, c);
1345                   /* If so, check if we can use boyer-moore.  */
1346                   if (c != inverse && boyer_moore_ok)
1347                     {
1348                       /* Check if all equivalents belong to the same
1349                          group of characters.  Note that the check of C
1350                          itself is done by the last iteration.  */
1351                       int this_char_base = -1;
1352
1353                       while (boyer_moore_ok)
1354                         {
1355                           if (ASCII_BYTE_P (inverse))
1356                             {
1357                               if (this_char_base > 0)
1358                                 boyer_moore_ok = 0;
1359                               else
1360                                 this_char_base = 0;
1361                             }
1362                           else if (CHAR_BYTE8_P (inverse))
1363                             /* Boyer-moore search can't handle a
1364                                translation of an eight-bit
1365                                character.  */
1366                             boyer_moore_ok = 0;
1367                           else if (this_char_base < 0)
1368                             {
1369                               this_char_base = inverse & ~0x3F;
1370                               if (char_base < 0)
1371                                 char_base = this_char_base;
1372                               else if (this_char_base != char_base)
1373                                 boyer_moore_ok = 0;
1374                             }
1375                           else if ((inverse & ~0x3F) != this_char_base)
1376                             boyer_moore_ok = 0;
1377                           if (c == inverse)
1378                             break;
1379                           TRANSLATE (inverse, inverse_trt, inverse);
1380                         }
1381                     }
1382                 }
1383
1384               /* Store this character into the translated pattern.  */
1385               memcpy (pat, str, charlen);
1386               pat += charlen;
1387               base_pat += in_charlen;
1388               len_byte -= in_charlen;
1389             }
1390
1391           /* If char_base is still negative we didn't find any translated
1392              non-ASCII characters.  */
1393           if (char_base < 0)
1394             char_base = 0;
1395         }
1396       else
1397         {
1398           /* Unibyte buffer.  */
1399           char_base = 0;
1400           while (--len >= 0)
1401             {
1402               int c, translated, inverse;
1403
1404               /* If we got here and the RE flag is set, it's because we're
1405                  dealing with a regexp known to be trivial, so the backslash
1406                  just quotes the next character.  */
1407               if (RE && *base_pat == '\\')
1408                 {
1409                   len--;
1410                   raw_pattern_size--;
1411                   base_pat++;
1412                 }
1413               c = *base_pat++;
1414               TRANSLATE (translated, trt, c);
1415               *pat++ = translated;
1416               /* Check that none of C's equivalents violates the
1417                  assumptions of boyer_moore.  */
1418               TRANSLATE (inverse, inverse_trt, c);
1419               while (1)
1420                 {
1421                   if (inverse >= 0200)
1422                     {
1423                       boyer_moore_ok = 0;
1424                       break;
1425                     }
1426                   if (c == inverse)
1427                     break;
1428                   TRANSLATE (inverse, inverse_trt, inverse);
1429                 }
1430             }
1431         }
1432
1433       len_byte = pat - patbuf;
1434       pat = base_pat = patbuf;
1435
1436       if (boyer_moore_ok)
1437         return boyer_moore (n, pat, len_byte, trt, inverse_trt,
1438                             pos_byte, lim_byte,
1439                             char_base);
1440       else
1441         return simple_search (n, pat, raw_pattern_size, len_byte, trt,
1442                               pos, pos_byte, lim, lim_byte);
1443     }
1444 }
1445 \f
1446 /* Do a simple string search N times for the string PAT,
1447    whose length is LEN/LEN_BYTE,
1448    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1449    TRT is the translation table.
1450
1451    Return the character position where the match is found.
1452    Otherwise, if M matches remained to be found, return -M.
1453
1454    This kind of search works regardless of what is in PAT and
1455    regardless of what is in TRT.  It is used in cases where
1456    boyer_moore cannot work.  */
1457
1458 static EMACS_INT
1459 simple_search (EMACS_INT n, unsigned char *pat,
1460                ptrdiff_t len, ptrdiff_t len_byte, Lisp_Object trt,
1461                ptrdiff_t pos, ptrdiff_t pos_byte,
1462                ptrdiff_t lim, ptrdiff_t lim_byte)
1463 {
1464   int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1465   int forward = n > 0;
1466   /* Number of buffer bytes matched.  Note that this may be different
1467      from len_byte in a multibyte buffer.  */
1468   ptrdiff_t match_byte = PTRDIFF_MIN;
1469
1470   if (lim > pos && multibyte)
1471     while (n > 0)
1472       {
1473         while (1)
1474           {
1475             /* Try matching at position POS.  */
1476             ptrdiff_t this_pos = pos;
1477             ptrdiff_t this_pos_byte = pos_byte;
1478             ptrdiff_t this_len = len;
1479             unsigned char *p = pat;
1480             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1481               goto stop;
1482
1483             while (this_len > 0)
1484               {
1485                 int charlen, buf_charlen;
1486                 int pat_ch, buf_ch;
1487
1488                 pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
1489                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1490                                                  buf_charlen);
1491                 TRANSLATE (buf_ch, trt, buf_ch);
1492
1493                 if (buf_ch != pat_ch)
1494                   break;
1495
1496                 this_len--;
1497                 p += charlen;
1498
1499                 this_pos_byte += buf_charlen;
1500                 this_pos++;
1501               }
1502
1503             if (this_len == 0)
1504               {
1505                 match_byte = this_pos_byte - pos_byte;
1506                 pos += len;
1507                 pos_byte += match_byte;
1508                 break;
1509               }
1510
1511             INC_BOTH (pos, pos_byte);
1512           }
1513
1514         n--;
1515       }
1516   else if (lim > pos)
1517     while (n > 0)
1518       {
1519         while (1)
1520           {
1521             /* Try matching at position POS.  */
1522             ptrdiff_t this_pos = pos;
1523             ptrdiff_t this_len = len;
1524             unsigned char *p = pat;
1525
1526             if (pos + len > lim)
1527               goto stop;
1528
1529             while (this_len > 0)
1530               {
1531                 int pat_ch = *p++;
1532                 int buf_ch = FETCH_BYTE (this_pos);
1533                 TRANSLATE (buf_ch, trt, buf_ch);
1534
1535                 if (buf_ch != pat_ch)
1536                   break;
1537
1538                 this_len--;
1539                 this_pos++;
1540               }
1541
1542             if (this_len == 0)
1543               {
1544                 match_byte = len;
1545                 pos += len;
1546                 break;
1547               }
1548
1549             pos++;
1550           }
1551
1552         n--;
1553       }
1554   /* Backwards search.  */
1555   else if (lim < pos && multibyte)
1556     while (n < 0)
1557       {
1558         while (1)
1559           {
1560             /* Try matching at position POS.  */
1561             ptrdiff_t this_pos = pos;
1562             ptrdiff_t this_pos_byte = pos_byte;
1563             ptrdiff_t this_len = len;
1564             const unsigned char *p = pat + len_byte;
1565
1566             if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
1567               goto stop;
1568
1569             while (this_len > 0)
1570               {
1571                 int pat_ch, buf_ch;
1572
1573                 DEC_BOTH (this_pos, this_pos_byte);
1574                 PREV_CHAR_BOUNDARY (p, pat);
1575                 pat_ch = STRING_CHAR (p);
1576                 buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
1577                 TRANSLATE (buf_ch, trt, buf_ch);
1578
1579                 if (buf_ch != pat_ch)
1580                   break;
1581
1582                 this_len--;
1583               }
1584
1585             if (this_len == 0)
1586               {
1587                 match_byte = pos_byte - this_pos_byte;
1588                 pos = this_pos;
1589                 pos_byte = this_pos_byte;
1590                 break;
1591               }
1592
1593             DEC_BOTH (pos, pos_byte);
1594           }
1595
1596         n++;
1597       }
1598   else if (lim < pos)
1599     while (n < 0)
1600       {
1601         while (1)
1602           {
1603             /* Try matching at position POS.  */
1604             ptrdiff_t this_pos = pos - len;
1605             ptrdiff_t this_len = len;
1606             unsigned char *p = pat;
1607
1608             if (this_pos < lim)
1609               goto stop;
1610
1611             while (this_len > 0)
1612               {
1613                 int pat_ch = *p++;
1614                 int buf_ch = FETCH_BYTE (this_pos);
1615                 TRANSLATE (buf_ch, trt, buf_ch);
1616
1617                 if (buf_ch != pat_ch)
1618                   break;
1619                 this_len--;
1620                 this_pos++;
1621               }
1622
1623             if (this_len == 0)
1624               {
1625                 match_byte = len;
1626                 pos -= len;
1627                 break;
1628               }
1629
1630             pos--;
1631           }
1632
1633         n++;
1634       }
1635
1636  stop:
1637   if (n == 0)
1638     {
1639       eassert (match_byte != PTRDIFF_MIN);
1640       if (forward)
1641         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1642       else
1643         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1644
1645       return pos;
1646     }
1647   else if (n > 0)
1648     return -n;
1649   else
1650     return n;
1651 }
1652 \f
1653 /* Do Boyer-Moore search N times for the string BASE_PAT,
1654    whose length is LEN_BYTE,
1655    from buffer position POS_BYTE until LIM_BYTE.
1656    DIRECTION says which direction we search in.
1657    TRT and INVERSE_TRT are translation tables.
1658    Characters in PAT are already translated by TRT.
1659
1660    This kind of search works if all the characters in BASE_PAT that
1661    have nontrivial translation are the same aside from the last byte.
1662    This makes it possible to translate just the last byte of a
1663    character, and do so after just a simple test of the context.
1664    CHAR_BASE is nonzero if there is such a non-ASCII character.
1665
1666    If that criterion is not satisfied, do not call this function.  */
1667
1668 static EMACS_INT
1669 boyer_moore (EMACS_INT n, unsigned char *base_pat,
1670              ptrdiff_t len_byte,
1671              Lisp_Object trt, Lisp_Object inverse_trt,
1672              ptrdiff_t pos_byte, ptrdiff_t lim_byte,
1673              int char_base)
1674 {
1675   int direction = ((n > 0) ? 1 : -1);
1676   register ptrdiff_t dirlen;
1677   ptrdiff_t limit;
1678   int stride_for_teases = 0;
1679   int BM_tab[0400];
1680   register unsigned char *cursor, *p_limit;
1681   register ptrdiff_t i;
1682   register int j;
1683   unsigned char *pat, *pat_end;
1684   int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1685
1686   unsigned char simple_translate[0400];
1687   /* These are set to the preceding bytes of a byte to be translated
1688      if char_base is nonzero.  As the maximum byte length of a
1689      multibyte character is 5, we have to check at most four previous
1690      bytes.  */
1691   int translate_prev_byte1 = 0;
1692   int translate_prev_byte2 = 0;
1693   int translate_prev_byte3 = 0;
1694
1695   /* The general approach is that we are going to maintain that we know
1696      the first (closest to the present position, in whatever direction
1697      we're searching) character that could possibly be the last
1698      (furthest from present position) character of a valid match.  We
1699      advance the state of our knowledge by looking at that character
1700      and seeing whether it indeed matches the last character of the
1701      pattern.  If it does, we take a closer look.  If it does not, we
1702      move our pointer (to putative last characters) as far as is
1703      logically possible.  This amount of movement, which I call a
1704      stride, will be the length of the pattern if the actual character
1705      appears nowhere in the pattern, otherwise it will be the distance
1706      from the last occurrence of that character to the end of the
1707      pattern.  If the amount is zero we have a possible match.  */
1708
1709   /* Here we make a "mickey mouse" BM table.  The stride of the search
1710      is determined only by the last character of the putative match.
1711      If that character does not match, we will stride the proper
1712      distance to propose a match that superimposes it on the last
1713      instance of a character that matches it (per trt), or misses
1714      it entirely if there is none. */
1715
1716   dirlen = len_byte * direction;
1717
1718   /* Record position after the end of the pattern.  */
1719   pat_end = base_pat + len_byte;
1720   /* BASE_PAT points to a character that we start scanning from.
1721      It is the first character in a forward search,
1722      the last character in a backward search.  */
1723   if (direction < 0)
1724     base_pat = pat_end - 1;
1725
1726   /* A character that does not appear in the pattern induces a
1727      stride equal to the pattern length.  */
1728   for (i = 0; i < 0400; i++)
1729     BM_tab[i] = dirlen;
1730
1731   /* We use this for translation, instead of TRT itself.
1732      We fill this in to handle the characters that actually
1733      occur in the pattern.  Others don't matter anyway!  */
1734   for (i = 0; i < 0400; i++)
1735     simple_translate[i] = i;
1736
1737   if (char_base)
1738     {
1739       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1740          byte following them are the target of translation.  */
1741       unsigned char str[MAX_MULTIBYTE_LENGTH];
1742       int cblen = CHAR_STRING (char_base, str);
1743
1744       translate_prev_byte1 = str[cblen - 2];
1745       if (cblen > 2)
1746         {
1747           translate_prev_byte2 = str[cblen - 3];
1748           if (cblen > 3)
1749             translate_prev_byte3 = str[cblen - 4];
1750         }
1751     }
1752
1753   i = 0;
1754   while (i != dirlen)
1755     {
1756       unsigned char *ptr = base_pat + i;
1757       i += direction;
1758       if (! NILP (trt))
1759         {
1760           /* If the byte currently looking at is the last of a
1761              character to check case-equivalents, set CH to that
1762              character.  An ASCII character and a non-ASCII character
1763              matching with CHAR_BASE are to be checked.  */
1764           int ch = -1;
1765
1766           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1767             ch = *ptr;
1768           else if (char_base
1769                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1770             {
1771               unsigned char *charstart = ptr - 1;
1772
1773               while (! (CHAR_HEAD_P (*charstart)))
1774                 charstart--;
1775               ch = STRING_CHAR (charstart);
1776               if (char_base != (ch & ~0x3F))
1777                 ch = -1;
1778             }
1779
1780           if (ch >= 0200 && multibyte)
1781             j = (ch & 0x3F) | 0200;
1782           else
1783             j = *ptr;
1784
1785           if (i == dirlen)
1786             stride_for_teases = BM_tab[j];
1787
1788           BM_tab[j] = dirlen - i;
1789           /* A translation table is accompanied by its inverse -- see
1790              comment following downcase_table for details.  */
1791           if (ch >= 0)
1792             {
1793               int starting_ch = ch;
1794               int starting_j = j;
1795
1796               while (1)
1797                 {
1798                   TRANSLATE (ch, inverse_trt, ch);
1799                   if (ch >= 0200 && multibyte)
1800                     j = (ch & 0x3F) | 0200;
1801                   else
1802                     j = ch;
1803
1804                   /* For all the characters that map into CH,
1805                      set up simple_translate to map the last byte
1806                      into STARTING_J.  */
1807                   simple_translate[j] = starting_j;
1808                   if (ch == starting_ch)
1809                     break;
1810                   BM_tab[j] = dirlen - i;
1811                 }
1812             }
1813         }
1814       else
1815         {
1816           j = *ptr;
1817
1818           if (i == dirlen)
1819             stride_for_teases = BM_tab[j];
1820           BM_tab[j] = dirlen - i;
1821         }
1822       /* stride_for_teases tells how much to stride if we get a
1823          match on the far character but are subsequently
1824          disappointed, by recording what the stride would have been
1825          for that character if the last character had been
1826          different.  */
1827     }
1828   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1829   /* loop invariant - POS_BYTE points at where last char (first
1830      char if reverse) of pattern would align in a possible match.  */
1831   while (n != 0)
1832     {
1833       ptrdiff_t tail_end;
1834       unsigned char *tail_end_ptr;
1835
1836       /* It's been reported that some (broken) compiler thinks that
1837          Boolean expressions in an arithmetic context are unsigned.
1838          Using an explicit ?1:0 prevents this.  */
1839       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1840           < 0)
1841         return (n * (0 - direction));
1842       /* First we do the part we can by pointers (maybe nothing) */
1843       QUIT;
1844       pat = base_pat;
1845       limit = pos_byte - dirlen + direction;
1846       if (direction > 0)
1847         {
1848           limit = BUFFER_CEILING_OF (limit);
1849           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1850              can take on without hitting edge of buffer or the gap.  */
1851           limit = min (limit, pos_byte + 20000);
1852           limit = min (limit, lim_byte - 1);
1853         }
1854       else
1855         {
1856           limit = BUFFER_FLOOR_OF (limit);
1857           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1858              can take on without hitting edge of buffer or the gap.  */
1859           limit = max (limit, pos_byte - 20000);
1860           limit = max (limit, lim_byte);
1861         }
1862       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1863       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1864
1865       if ((limit - pos_byte) * direction > 20)
1866         {
1867           unsigned char *p2;
1868
1869           p_limit = BYTE_POS_ADDR (limit);
1870           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1871           /* In this loop, pos + cursor - p2 is the surrogate for pos.  */
1872           while (1)             /* use one cursor setting as long as i can */
1873             {
1874               if (direction > 0) /* worth duplicating */
1875                 {
1876                   while (cursor <= p_limit)
1877                     {
1878                       if (BM_tab[*cursor] == 0)
1879                         goto hit;
1880                       cursor += BM_tab[*cursor];
1881                     }
1882                 }
1883               else
1884                 {
1885                   while (cursor >= p_limit)
1886                     {
1887                       if (BM_tab[*cursor] == 0)
1888                         goto hit;
1889                       cursor += BM_tab[*cursor];
1890                     }
1891                 }
1892               /* If you are here, cursor is beyond the end of the
1893                  searched region.  You fail to match within the
1894                  permitted region and would otherwise try a character
1895                  beyond that region.  */
1896               break;
1897
1898             hit:
1899               i = dirlen - direction;
1900               if (! NILP (trt))
1901                 {
1902                   while ((i -= direction) + direction != 0)
1903                     {
1904                       int ch;
1905                       cursor -= direction;
1906                       /* Translate only the last byte of a character.  */
1907                       if (! multibyte
1908                           || ((cursor == tail_end_ptr
1909                                || CHAR_HEAD_P (cursor[1]))
1910                               && (CHAR_HEAD_P (cursor[0])
1911                                   /* Check if this is the last byte of
1912                                      a translatable character.  */
1913                                   || (translate_prev_byte1 == cursor[-1]
1914                                       && (CHAR_HEAD_P (translate_prev_byte1)
1915                                           || (translate_prev_byte2 == cursor[-2]
1916                                               && (CHAR_HEAD_P (translate_prev_byte2)
1917                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1918                         ch = simple_translate[*cursor];
1919                       else
1920                         ch = *cursor;
1921                       if (pat[i] != ch)
1922                         break;
1923                     }
1924                 }
1925               else
1926                 {
1927                   while ((i -= direction) + direction != 0)
1928                     {
1929                       cursor -= direction;
1930                       if (pat[i] != *cursor)
1931                         break;
1932                     }
1933                 }
1934               cursor += dirlen - i - direction; /* fix cursor */
1935               if (i + direction == 0)
1936                 {
1937                   ptrdiff_t position, start, end;
1938
1939                   cursor -= direction;
1940
1941                   position = pos_byte + cursor - p2 + ((direction > 0)
1942                                                        ? 1 - len_byte : 0);
1943                   set_search_regs (position, len_byte);
1944
1945                   if (NILP (Vinhibit_changing_match_data))
1946                     {
1947                       start = search_regs.start[0];
1948                       end = search_regs.end[0];
1949                     }
1950                   else
1951                     /* If Vinhibit_changing_match_data is non-nil,
1952                        search_regs will not be changed.  So let's
1953                        compute start and end here.  */
1954                     {
1955                       start = BYTE_TO_CHAR (position);
1956                       end = BYTE_TO_CHAR (position + len_byte);
1957                     }
1958
1959                   if ((n -= direction) != 0)
1960                     cursor += dirlen; /* to resume search */
1961                   else
1962                     return direction > 0 ? end : start;
1963                 }
1964               else
1965                 cursor += stride_for_teases; /* <sigh> we lose -  */
1966             }
1967           pos_byte += cursor - p2;
1968         }
1969       else
1970         /* Now we'll pick up a clump that has to be done the hard
1971            way because it covers a discontinuity.  */
1972         {
1973           limit = ((direction > 0)
1974                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1975                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1976           limit = ((direction > 0)
1977                    ? min (limit + len_byte, lim_byte - 1)
1978                    : max (limit - len_byte, lim_byte));
1979           /* LIMIT is now the last value POS_BYTE can have
1980              and still be valid for a possible match.  */
1981           while (1)
1982             {
1983               /* This loop can be coded for space rather than
1984                  speed because it will usually run only once.
1985                  (the reach is at most len + 21, and typically
1986                  does not exceed len).  */
1987               while ((limit - pos_byte) * direction >= 0)
1988                 {
1989                   int ch = FETCH_BYTE (pos_byte);
1990                   if (BM_tab[ch] == 0)
1991                     goto hit2;
1992                   pos_byte += BM_tab[ch];
1993                 }
1994               break;    /* ran off the end */
1995
1996             hit2:
1997               /* Found what might be a match.  */
1998               i = dirlen - direction;
1999               while ((i -= direction) + direction != 0)
2000                 {
2001                   int ch;
2002                   unsigned char *ptr;
2003                   pos_byte -= direction;
2004                   ptr = BYTE_POS_ADDR (pos_byte);
2005                   /* Translate only the last byte of a character.  */
2006                   if (! multibyte
2007                       || ((ptr == tail_end_ptr
2008                            || CHAR_HEAD_P (ptr[1]))
2009                           && (CHAR_HEAD_P (ptr[0])
2010                               /* Check if this is the last byte of a
2011                                  translatable character.  */
2012                               || (translate_prev_byte1 == ptr[-1]
2013                                   && (CHAR_HEAD_P (translate_prev_byte1)
2014                                       || (translate_prev_byte2 == ptr[-2]
2015                                           && (CHAR_HEAD_P (translate_prev_byte2)
2016                                               || translate_prev_byte3 == ptr[-3])))))))
2017                     ch = simple_translate[*ptr];
2018                   else
2019                     ch = *ptr;
2020                   if (pat[i] != ch)
2021                     break;
2022                 }
2023               /* Above loop has moved POS_BYTE part or all the way
2024                  back to the first pos (last pos if reverse).
2025                  Set it once again at the last (first if reverse) char.  */
2026               pos_byte += dirlen - i - direction;
2027               if (i + direction == 0)
2028                 {
2029                   ptrdiff_t position, start, end;
2030                   pos_byte -= direction;
2031
2032                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2033                   set_search_regs (position, len_byte);
2034
2035                   if (NILP (Vinhibit_changing_match_data))
2036                     {
2037                       start = search_regs.start[0];
2038                       end = search_regs.end[0];
2039                     }
2040                   else
2041                     /* If Vinhibit_changing_match_data is non-nil,
2042                        search_regs will not be changed.  So let's
2043                        compute start and end here.  */
2044                     {
2045                       start = BYTE_TO_CHAR (position);
2046                       end = BYTE_TO_CHAR (position + len_byte);
2047                     }
2048
2049                   if ((n -= direction) != 0)
2050                     pos_byte += dirlen; /* to resume search */
2051                   else
2052                     return direction > 0 ? end : start;
2053                 }
2054               else
2055                 pos_byte += stride_for_teases;
2056             }
2057           }
2058       /* We have done one clump.  Can we continue? */
2059       if ((lim_byte - pos_byte) * direction < 0)
2060         return ((0 - n) * direction);
2061     }
2062   return BYTE_TO_CHAR (pos_byte);
2063 }
2064
2065 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2066    for the overall match just found in the current buffer.
2067    Also clear out the match data for registers 1 and up.  */
2068
2069 static void
2070 set_search_regs (ptrdiff_t beg_byte, ptrdiff_t nbytes)
2071 {
2072   ptrdiff_t i;
2073
2074   if (!NILP (Vinhibit_changing_match_data))
2075     return;
2076
2077   /* Make sure we have registers in which to store
2078      the match position.  */
2079   if (search_regs.num_regs == 0)
2080     {
2081       search_regs.start = xmalloc (2 * sizeof (regoff_t));
2082       search_regs.end = xmalloc (2 * sizeof (regoff_t));
2083       search_regs.num_regs = 2;
2084     }
2085
2086   /* Clear out the other registers.  */
2087   for (i = 1; i < search_regs.num_regs; i++)
2088     {
2089       search_regs.start[i] = -1;
2090       search_regs.end[i] = -1;
2091     }
2092
2093   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2094   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2095   XSETBUFFER (last_thing_searched, current_buffer);
2096 }
2097 \f
2098 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2099        "MSearch backward: ",
2100        doc: /* Search backward from point for STRING.
2101 Set point to the beginning of the occurrence found, and return point.
2102 An optional second argument bounds the search; it is a buffer position.
2103 The match found must not extend before that position.
2104 Optional third argument, if t, means if fail just return nil (no error).
2105  If not nil and not t, position at limit of search and return nil.
2106 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2107  successive occurrences.  If COUNT is negative, search forward,
2108  instead of backward, for -COUNT occurrences.
2109
2110 Search case-sensitivity is determined by the value of the variable
2111 `case-fold-search', which see.
2112
2113 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2114   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2115 {
2116   return search_command (string, bound, noerror, count, -1, 0, 0);
2117 }
2118
2119 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2120        doc: /* Search forward from point for STRING.
2121 Set point to the end of the occurrence found, and return point.
2122 An optional second argument bounds the search; it is a buffer position.
2123 The match found must not extend after that position.  A value of nil is
2124   equivalent to (point-max).
2125 Optional third argument, if t, means if fail just return nil (no error).
2126   If not nil and not t, move to limit of search and return nil.
2127 Optional fourth argument COUNT, if non-nil, means to search for COUNT
2128  successive occurrences.  If COUNT is negative, search backward,
2129  instead of forward, for -COUNT occurrences.
2130
2131 Search case-sensitivity is determined by the value of the variable
2132 `case-fold-search', which see.
2133
2134 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2135   (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2136 {
2137   return search_command (string, bound, noerror, count, 1, 0, 0);
2138 }
2139
2140 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2141        "sRE search backward: ",
2142        doc: /* Search backward from point for match for regular expression REGEXP.
2143 Set point to the beginning of the match, and return point.
2144 The match found is the one starting last in the buffer
2145 and yet ending before the origin of the search.
2146 An optional second argument bounds the search; it is a buffer position.
2147 The match found must start at or after that position.
2148 Optional third argument, if t, means if fail just return nil (no error).
2149   If not nil and not t, move to limit of search and return nil.
2150 Optional fourth argument is repeat count--search for successive occurrences.
2151
2152 Search case-sensitivity is determined by the value of the variable
2153 `case-fold-search', which see.
2154
2155 See also the functions `match-beginning', `match-end', `match-string',
2156 and `replace-match'.  */)
2157   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2158 {
2159   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2160 }
2161
2162 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2163        "sRE search: ",
2164        doc: /* Search forward from point for regular expression REGEXP.
2165 Set point to the end of the occurrence found, and return point.
2166 An optional second argument bounds the search; it is a buffer position.
2167 The match found must not extend after that position.
2168 Optional third argument, if t, means if fail just return nil (no error).
2169   If not nil and not t, move to limit of search and return nil.
2170 Optional fourth argument is repeat count--search for successive occurrences.
2171
2172 Search case-sensitivity is determined by the value of the variable
2173 `case-fold-search', which see.
2174
2175 See also the functions `match-beginning', `match-end', `match-string',
2176 and `replace-match'.  */)
2177   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2178 {
2179   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2180 }
2181
2182 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2183        "sPosix search backward: ",
2184        doc: /* Search backward from point for match for regular expression REGEXP.
2185 Find the longest match in accord with Posix regular expression rules.
2186 Set point to the beginning of the match, and return point.
2187 The match found is the one starting last in the buffer
2188 and yet ending before the origin of the search.
2189 An optional second argument bounds the search; it is a buffer position.
2190 The match found must start at or after that position.
2191 Optional third argument, if t, means if fail just return nil (no error).
2192   If not nil and not t, move to limit of search and return nil.
2193 Optional fourth argument is repeat count--search for successive occurrences.
2194
2195 Search case-sensitivity is determined by the value of the variable
2196 `case-fold-search', which see.
2197
2198 See also the functions `match-beginning', `match-end', `match-string',
2199 and `replace-match'.  */)
2200   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2201 {
2202   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2203 }
2204
2205 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2206        "sPosix search: ",
2207        doc: /* Search forward from point for regular expression REGEXP.
2208 Find the longest match in accord with Posix regular expression rules.
2209 Set point to the end of the occurrence found, and return point.
2210 An optional second argument bounds the search; it is a buffer position.
2211 The match found must not extend after that position.
2212 Optional third argument, if t, means if fail just return nil (no error).
2213   If not nil and not t, move to limit of search and return nil.
2214 Optional fourth argument is repeat count--search for successive occurrences.
2215
2216 Search case-sensitivity is determined by the value of the variable
2217 `case-fold-search', which see.
2218
2219 See also the functions `match-beginning', `match-end', `match-string',
2220 and `replace-match'.  */)
2221   (Lisp_Object regexp, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
2222 {
2223   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2224 }
2225 \f
2226 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2227        doc: /* Replace text matched by last search with NEWTEXT.
2228 Leave point at the end of the replacement text.
2229
2230 If optional second arg FIXEDCASE is non-nil, do not alter the case of
2231 the replacement text.  Otherwise, maybe capitalize the whole text, or
2232 maybe just word initials, based on the replaced text.  If the replaced
2233 text has only capital letters and has at least one multiletter word,
2234 convert NEWTEXT to all caps.  Otherwise if all words are capitalized
2235 in the replaced text, capitalize each word in NEWTEXT.
2236
2237 If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
2238 Otherwise treat `\\' as special:
2239   `\\&' in NEWTEXT means substitute original matched text.
2240   `\\N' means substitute what matched the Nth `\\(...\\)'.
2241        If Nth parens didn't match, substitute nothing.
2242   `\\\\' means insert one `\\'.
2243   `\\?' is treated literally
2244        (for compatibility with `query-replace-regexp').
2245   Any other character following `\\' signals an error.
2246 Case conversion does not apply to these substitutions.
2247
2248 If optional fourth argument STRING is non-nil, it should be a string
2249 to act on; this should be the string on which the previous match was
2250 done via `string-match'.  In this case, `replace-match' creates and
2251 returns a new string, made by copying STRING and replacing the part of
2252 STRING that was matched (the original STRING itself is not altered).
2253
2254 The optional fifth argument SUBEXP specifies a subexpression;
2255 it says to replace just that subexpression with NEWTEXT,
2256 rather than replacing the entire matched text.
2257 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2258 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2259 NEWTEXT in place of subexp N.
2260 This is useful only after a regular expression search or match,
2261 since only regular expressions have distinguished subexpressions.  */)
2262   (Lisp_Object newtext, Lisp_Object fixedcase, Lisp_Object literal, Lisp_Object string, Lisp_Object subexp)
2263 {
2264   enum { nochange, all_caps, cap_initial } case_action;
2265   register ptrdiff_t pos, pos_byte;
2266   int some_multiletter_word;
2267   int some_lowercase;
2268   int some_uppercase;
2269   int some_nonuppercase_initial;
2270   register int c, prevc;
2271   ptrdiff_t sub;
2272   ptrdiff_t opoint, newpoint;
2273
2274   CHECK_STRING (newtext);
2275
2276   if (! NILP (string))
2277     CHECK_STRING (string);
2278
2279   case_action = nochange;       /* We tried an initialization */
2280                                 /* but some C compilers blew it */
2281
2282   if (search_regs.num_regs <= 0)
2283     error ("`replace-match' called before any match found");
2284
2285   if (NILP (subexp))
2286     sub = 0;
2287   else
2288     {
2289       CHECK_NUMBER (subexp);
2290       if (! (0 <= XINT (subexp) && XINT (subexp) < search_regs.num_regs))
2291         args_out_of_range (subexp, make_number (search_regs.num_regs));
2292       sub = XINT (subexp);
2293     }
2294
2295   if (NILP (string))
2296     {
2297       if (search_regs.start[sub] < BEGV
2298           || search_regs.start[sub] > search_regs.end[sub]
2299           || search_regs.end[sub] > ZV)
2300         args_out_of_range (make_number (search_regs.start[sub]),
2301                            make_number (search_regs.end[sub]));
2302     }
2303   else
2304     {
2305       if (search_regs.start[sub] < 0
2306           || search_regs.start[sub] > search_regs.end[sub]
2307           || search_regs.end[sub] > SCHARS (string))
2308         args_out_of_range (make_number (search_regs.start[sub]),
2309                            make_number (search_regs.end[sub]));
2310     }
2311
2312   if (NILP (fixedcase))
2313     {
2314       /* Decide how to casify by examining the matched text. */
2315       ptrdiff_t last;
2316
2317       pos = search_regs.start[sub];
2318       last = search_regs.end[sub];
2319
2320       if (NILP (string))
2321         pos_byte = CHAR_TO_BYTE (pos);
2322       else
2323         pos_byte = string_char_to_byte (string, pos);
2324
2325       prevc = '\n';
2326       case_action = all_caps;
2327
2328       /* some_multiletter_word is set nonzero if any original word
2329          is more than one letter long. */
2330       some_multiletter_word = 0;
2331       some_lowercase = 0;
2332       some_nonuppercase_initial = 0;
2333       some_uppercase = 0;
2334
2335       while (pos < last)
2336         {
2337           if (NILP (string))
2338             {
2339               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2340               INC_BOTH (pos, pos_byte);
2341             }
2342           else
2343             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2344
2345           if (lowercasep (c))
2346             {
2347               /* Cannot be all caps if any original char is lower case */
2348
2349               some_lowercase = 1;
2350               if (SYNTAX (prevc) != Sword)
2351                 some_nonuppercase_initial = 1;
2352               else
2353                 some_multiletter_word = 1;
2354             }
2355           else if (uppercasep (c))
2356             {
2357               some_uppercase = 1;
2358               if (SYNTAX (prevc) != Sword)
2359                 ;
2360               else
2361                 some_multiletter_word = 1;
2362             }
2363           else
2364             {
2365               /* If the initial is a caseless word constituent,
2366                  treat that like a lowercase initial.  */
2367               if (SYNTAX (prevc) != Sword)
2368                 some_nonuppercase_initial = 1;
2369             }
2370
2371           prevc = c;
2372         }
2373
2374       /* Convert to all caps if the old text is all caps
2375          and has at least one multiletter word.  */
2376       if (! some_lowercase && some_multiletter_word)
2377         case_action = all_caps;
2378       /* Capitalize each word, if the old text has all capitalized words.  */
2379       else if (!some_nonuppercase_initial && some_multiletter_word)
2380         case_action = cap_initial;
2381       else if (!some_nonuppercase_initial && some_uppercase)
2382         /* Should x -> yz, operating on X, give Yz or YZ?
2383            We'll assume the latter.  */
2384         case_action = all_caps;
2385       else
2386         case_action = nochange;
2387     }
2388
2389   /* Do replacement in a string.  */
2390   if (!NILP (string))
2391     {
2392       Lisp_Object before, after;
2393
2394       before = Fsubstring (string, make_number (0),
2395                            make_number (search_regs.start[sub]));
2396       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2397
2398       /* Substitute parts of the match into NEWTEXT
2399          if desired.  */
2400       if (NILP (literal))
2401         {
2402           ptrdiff_t lastpos = 0;
2403           ptrdiff_t lastpos_byte = 0;
2404           /* We build up the substituted string in ACCUM.  */
2405           Lisp_Object accum;
2406           Lisp_Object middle;
2407           ptrdiff_t length = SBYTES (newtext);
2408
2409           accum = Qnil;
2410
2411           for (pos_byte = 0, pos = 0; pos_byte < length;)
2412             {
2413               ptrdiff_t substart = -1;
2414               ptrdiff_t subend = 0;
2415               int delbackslash = 0;
2416
2417               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2418
2419               if (c == '\\')
2420                 {
2421                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2422
2423                   if (c == '&')
2424                     {
2425                       substart = search_regs.start[sub];
2426                       subend = search_regs.end[sub];
2427                     }
2428                   else if (c >= '1' && c <= '9')
2429                     {
2430                       if (c - '0' < search_regs.num_regs
2431                           && 0 <= search_regs.start[c - '0'])
2432                         {
2433                           substart = search_regs.start[c - '0'];
2434                           subend = search_regs.end[c - '0'];
2435                         }
2436                       else
2437                         {
2438                           /* If that subexp did not match,
2439                              replace \\N with nothing.  */
2440                           substart = 0;
2441                           subend = 0;
2442                         }
2443                     }
2444                   else if (c == '\\')
2445                     delbackslash = 1;
2446                   else if (c != '?')
2447                     error ("Invalid use of `\\' in replacement text");
2448                 }
2449               if (substart >= 0)
2450                 {
2451                   if (pos - 2 != lastpos)
2452                     middle = substring_both (newtext, lastpos,
2453                                              lastpos_byte,
2454                                              pos - 2, pos_byte - 2);
2455                   else
2456                     middle = Qnil;
2457                   accum = concat3 (accum, middle,
2458                                    Fsubstring (string,
2459                                                make_number (substart),
2460                                                make_number (subend)));
2461                   lastpos = pos;
2462                   lastpos_byte = pos_byte;
2463                 }
2464               else if (delbackslash)
2465                 {
2466                   middle = substring_both (newtext, lastpos,
2467                                            lastpos_byte,
2468                                            pos - 1, pos_byte - 1);
2469
2470                   accum = concat2 (accum, middle);
2471                   lastpos = pos;
2472                   lastpos_byte = pos_byte;
2473                 }
2474             }
2475
2476           if (pos != lastpos)
2477             middle = substring_both (newtext, lastpos,
2478                                      lastpos_byte,
2479                                      pos, pos_byte);
2480           else
2481             middle = Qnil;
2482
2483           newtext = concat2 (accum, middle);
2484         }
2485
2486       /* Do case substitution in NEWTEXT if desired.  */
2487       if (case_action == all_caps)
2488         newtext = Fupcase (newtext);
2489       else if (case_action == cap_initial)
2490         newtext = Fupcase_initials (newtext);
2491
2492       return concat3 (before, newtext, after);
2493     }
2494
2495   /* Record point, then move (quietly) to the start of the match.  */
2496   if (PT >= search_regs.end[sub])
2497     opoint = PT - ZV;
2498   else if (PT > search_regs.start[sub])
2499     opoint = search_regs.end[sub] - ZV;
2500   else
2501     opoint = PT;
2502
2503   /* If we want non-literal replacement,
2504      perform substitution on the replacement string.  */
2505   if (NILP (literal))
2506     {
2507       ptrdiff_t length = SBYTES (newtext);
2508       unsigned char *substed;
2509       ptrdiff_t substed_alloc_size, substed_len;
2510       int buf_multibyte = !NILP (BVAR (current_buffer, enable_multibyte_characters));
2511       int str_multibyte = STRING_MULTIBYTE (newtext);
2512       int really_changed = 0;
2513
2514       substed_alloc_size = ((STRING_BYTES_BOUND - 100) / 2 < length
2515                             ? STRING_BYTES_BOUND
2516                             : length * 2 + 100);
2517       substed = xmalloc (substed_alloc_size);
2518       substed_len = 0;
2519
2520       /* Go thru NEWTEXT, producing the actual text to insert in
2521          SUBSTED while adjusting multibyteness to that of the current
2522          buffer.  */
2523
2524       for (pos_byte = 0, pos = 0; pos_byte < length;)
2525         {
2526           unsigned char str[MAX_MULTIBYTE_LENGTH];
2527           const unsigned char *add_stuff = NULL;
2528           ptrdiff_t add_len = 0;
2529           ptrdiff_t idx = -1;
2530
2531           if (str_multibyte)
2532             {
2533               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2534               if (!buf_multibyte)
2535                 c = multibyte_char_to_unibyte (c);
2536             }
2537           else
2538             {
2539               /* Note that we don't have to increment POS.  */
2540               c = SREF (newtext, pos_byte++);
2541               if (buf_multibyte)
2542                 MAKE_CHAR_MULTIBYTE (c);
2543             }
2544
2545           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2546              or set IDX to a match index, which means put that part
2547              of the buffer text into SUBSTED.  */
2548
2549           if (c == '\\')
2550             {
2551               really_changed = 1;
2552
2553               if (str_multibyte)
2554                 {
2555                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2556                                                       pos, pos_byte);
2557                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2558                     c = multibyte_char_to_unibyte (c);
2559                 }
2560               else
2561                 {
2562                   c = SREF (newtext, pos_byte++);
2563                   if (buf_multibyte)
2564                     MAKE_CHAR_MULTIBYTE (c);
2565                 }
2566
2567               if (c == '&')
2568                 idx = sub;
2569               else if (c >= '1' && c <= '9' && c - '0' < search_regs.num_regs)
2570                 {
2571                   if (search_regs.start[c - '0'] >= 1)
2572                     idx = c - '0';
2573                 }
2574               else if (c == '\\')
2575                 add_len = 1, add_stuff = (unsigned char *) "\\";
2576               else
2577                 {
2578                   xfree (substed);
2579                   error ("Invalid use of `\\' in replacement text");
2580                 }
2581             }
2582           else
2583             {
2584               add_len = CHAR_STRING (c, str);
2585               add_stuff = str;
2586             }
2587
2588           /* If we want to copy part of a previous match,
2589              set up ADD_STUFF and ADD_LEN to point to it.  */
2590           if (idx >= 0)
2591             {
2592               ptrdiff_t begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2593               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2594               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2595                 move_gap_both (search_regs.start[idx], begbyte);
2596               add_stuff = BYTE_POS_ADDR (begbyte);
2597             }
2598
2599           /* Now the stuff we want to add to SUBSTED
2600              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2601
2602           /* Make sure SUBSTED is big enough.  */
2603           if (substed_alloc_size - substed_len < add_len)
2604             substed =
2605               xpalloc (substed, &substed_alloc_size,
2606                        add_len - (substed_alloc_size - substed_len),
2607                        STRING_BYTES_BOUND, 1);
2608
2609           /* Now add to the end of SUBSTED.  */
2610           if (add_stuff)
2611             {
2612               memcpy (substed + substed_len, add_stuff, add_len);
2613               substed_len += add_len;
2614             }
2615         }
2616
2617       if (really_changed)
2618         {
2619           if (buf_multibyte)
2620             {
2621               ptrdiff_t nchars =
2622                 multibyte_chars_in_text (substed, substed_len);
2623
2624               newtext = make_multibyte_string ((char *) substed, nchars,
2625                                                substed_len);
2626             }
2627           else
2628             newtext = make_unibyte_string ((char *) substed, substed_len);
2629         }
2630       xfree (substed);
2631     }
2632
2633   /* Replace the old text with the new in the cleanest possible way.  */
2634   replace_range (search_regs.start[sub], search_regs.end[sub],
2635                  newtext, 1, 0, 1);
2636   newpoint = search_regs.start[sub] + SCHARS (newtext);
2637
2638   if (case_action == all_caps)
2639     Fupcase_region (make_number (search_regs.start[sub]),
2640                     make_number (newpoint));
2641   else if (case_action == cap_initial)
2642     Fupcase_initials_region (make_number (search_regs.start[sub]),
2643                              make_number (newpoint));
2644
2645   /* Adjust search data for this change.  */
2646   {
2647     ptrdiff_t oldend = search_regs.end[sub];
2648     ptrdiff_t oldstart = search_regs.start[sub];
2649     ptrdiff_t change = newpoint - search_regs.end[sub];
2650     ptrdiff_t i;
2651
2652     for (i = 0; i < search_regs.num_regs; i++)
2653       {
2654         if (search_regs.start[i] >= oldend)
2655           search_regs.start[i] += change;
2656         else if (search_regs.start[i] > oldstart)
2657           search_regs.start[i] = oldstart;
2658         if (search_regs.end[i] >= oldend)
2659           search_regs.end[i] += change;
2660         else if (search_regs.end[i] > oldstart)
2661           search_regs.end[i] = oldstart;
2662       }
2663   }
2664
2665   /* Put point back where it was in the text.  */
2666   if (opoint <= 0)
2667     TEMP_SET_PT (opoint + ZV);
2668   else
2669     TEMP_SET_PT (opoint);
2670
2671   /* Now move point "officially" to the start of the inserted replacement.  */
2672   move_if_not_intangible (newpoint);
2673
2674   return Qnil;
2675 }
2676 \f
2677 static Lisp_Object
2678 match_limit (Lisp_Object num, int beginningp)
2679 {
2680   EMACS_INT n;
2681
2682   CHECK_NUMBER (num);
2683   n = XINT (num);
2684   if (n < 0)
2685     args_out_of_range (num, make_number (0));
2686   if (search_regs.num_regs <= 0)
2687     error ("No match data, because no search succeeded");
2688   if (n >= search_regs.num_regs
2689       || search_regs.start[n] < 0)
2690     return Qnil;
2691   return (make_number ((beginningp) ? search_regs.start[n]
2692                                     : search_regs.end[n]));
2693 }
2694
2695 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2696        doc: /* Return position of start of text matched by last search.
2697 SUBEXP, a number, specifies which parenthesized expression in the last
2698   regexp.
2699 Value is nil if SUBEXPth pair didn't match, or there were less than
2700   SUBEXP pairs.
2701 Zero means the entire text matched by the whole regexp or whole string.  */)
2702   (Lisp_Object subexp)
2703 {
2704   return match_limit (subexp, 1);
2705 }
2706
2707 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2708        doc: /* Return position of end of text matched by last search.
2709 SUBEXP, a number, specifies which parenthesized expression in the last
2710   regexp.
2711 Value is nil if SUBEXPth pair didn't match, or there were less than
2712   SUBEXP pairs.
2713 Zero means the entire text matched by the whole regexp or whole string.  */)
2714   (Lisp_Object subexp)
2715 {
2716   return match_limit (subexp, 0);
2717 }
2718
2719 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2720        doc: /* Return a list containing all info on what the last search matched.
2721 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2722 All the elements are markers or nil (nil if the Nth pair didn't match)
2723 if the last match was on a buffer; integers or nil if a string was matched.
2724 Use `set-match-data' to reinstate the data in this list.
2725
2726 If INTEGERS (the optional first argument) is non-nil, always use
2727 integers \(rather than markers) to represent buffer positions.  In
2728 this case, and if the last match was in a buffer, the buffer will get
2729 stored as one additional element at the end of the list.
2730
2731 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2732 enough to hold all the values, and if INTEGERS is non-nil, no consing
2733 is done.
2734
2735 If optional third arg RESEAT is non-nil, any previous markers on the
2736 REUSE list will be modified to point to nowhere.
2737
2738 Return value is undefined if the last search failed.  */)
2739   (Lisp_Object integers, Lisp_Object reuse, Lisp_Object reseat)
2740 {
2741   Lisp_Object tail, prev;
2742   Lisp_Object *data;
2743   ptrdiff_t i, len;
2744
2745   if (!NILP (reseat))
2746     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2747       if (MARKERP (XCAR (tail)))
2748         {
2749           unchain_marker (XMARKER (XCAR (tail)));
2750           XSETCAR (tail, Qnil);
2751         }
2752
2753   if (NILP (last_thing_searched))
2754     return Qnil;
2755
2756   prev = Qnil;
2757
2758   data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
2759
2760   len = 0;
2761   for (i = 0; i < search_regs.num_regs; i++)
2762     {
2763       ptrdiff_t start = search_regs.start[i];
2764       if (start >= 0)
2765         {
2766           if (EQ (last_thing_searched, Qt)
2767               || ! NILP (integers))
2768             {
2769               XSETFASTINT (data[2 * i], start);
2770               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2771             }
2772           else if (BUFFERP (last_thing_searched))
2773             {
2774               data[2 * i] = Fmake_marker ();
2775               Fset_marker (data[2 * i],
2776                            make_number (start),
2777                            last_thing_searched);
2778               data[2 * i + 1] = Fmake_marker ();
2779               Fset_marker (data[2 * i + 1],
2780                            make_number (search_regs.end[i]),
2781                            last_thing_searched);
2782             }
2783           else
2784             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2785             emacs_abort ();
2786
2787           len = 2 * i + 2;
2788         }
2789       else
2790         data[2 * i] = data[2 * i + 1] = Qnil;
2791     }
2792
2793   if (BUFFERP (last_thing_searched) && !NILP (integers))
2794     {
2795       data[len] = last_thing_searched;
2796       len++;
2797     }
2798
2799   /* If REUSE is not usable, cons up the values and return them.  */
2800   if (! CONSP (reuse))
2801     return Flist (len, data);
2802
2803   /* If REUSE is a list, store as many value elements as will fit
2804      into the elements of REUSE.  */
2805   for (i = 0, tail = reuse; CONSP (tail);
2806        i++, tail = XCDR (tail))
2807     {
2808       if (i < len)
2809         XSETCAR (tail, data[i]);
2810       else
2811         XSETCAR (tail, Qnil);
2812       prev = tail;
2813     }
2814
2815   /* If we couldn't fit all value elements into REUSE,
2816      cons up the rest of them and add them to the end of REUSE.  */
2817   if (i < len)
2818     XSETCDR (prev, Flist (len - i, data + i));
2819
2820   return reuse;
2821 }
2822
2823 /* We used to have an internal use variant of `reseat' described as:
2824
2825       If RESEAT is `evaporate', put the markers back on the free list
2826       immediately.  No other references to the markers must exist in this
2827       case, so it is used only internally on the unwind stack and
2828       save-match-data from Lisp.
2829
2830    But it was ill-conceived: those supposedly-internal markers get exposed via
2831    the undo-list, so freeing them here is unsafe.  */
2832
2833 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2834        doc: /* Set internal data on last search match from elements of LIST.
2835 LIST should have been created by calling `match-data' previously.
2836
2837 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2838   (register Lisp_Object list, Lisp_Object reseat)
2839 {
2840   ptrdiff_t i;
2841   register Lisp_Object marker;
2842
2843   if (running_asynch_code)
2844     save_search_regs ();
2845
2846   CHECK_LIST (list);
2847
2848   /* Unless we find a marker with a buffer or an explicit buffer
2849      in LIST, assume that this match data came from a string.  */
2850   last_thing_searched = Qt;
2851
2852   /* Allocate registers if they don't already exist.  */
2853   {
2854     EMACS_INT length = XFASTINT (Flength (list)) / 2;
2855
2856     if (length > search_regs.num_regs)
2857       {
2858         ptrdiff_t num_regs = search_regs.num_regs;
2859         if (PTRDIFF_MAX < length)
2860           memory_full (SIZE_MAX);
2861         search_regs.start =
2862           xpalloc (search_regs.start, &num_regs, length - num_regs,
2863                    min (PTRDIFF_MAX, UINT_MAX), sizeof (regoff_t));
2864         search_regs.end =
2865           xrealloc (search_regs.end, num_regs * sizeof (regoff_t));
2866
2867         for (i = search_regs.num_regs; i < num_regs; i++)
2868           search_regs.start[i] = -1;
2869
2870         search_regs.num_regs = num_regs;
2871       }
2872
2873     for (i = 0; CONSP (list); i++)
2874       {
2875         marker = XCAR (list);
2876         if (BUFFERP (marker))
2877           {
2878             last_thing_searched = marker;
2879             break;
2880           }
2881         if (i >= length)
2882           break;
2883         if (NILP (marker))
2884           {
2885             search_regs.start[i] = -1;
2886             list = XCDR (list);
2887           }
2888         else
2889           {
2890             Lisp_Object from;
2891             Lisp_Object m;
2892
2893             m = marker;
2894             if (MARKERP (marker))
2895               {
2896                 if (XMARKER (marker)->buffer == 0)
2897                   XSETFASTINT (marker, 0);
2898                 else
2899                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2900               }
2901
2902             CHECK_NUMBER_COERCE_MARKER (marker);
2903             from = marker;
2904
2905             if (!NILP (reseat) && MARKERP (m))
2906               {
2907                 unchain_marker (XMARKER (m));
2908                 XSETCAR (list, Qnil);
2909               }
2910
2911             if ((list = XCDR (list), !CONSP (list)))
2912               break;
2913
2914             m = marker = XCAR (list);
2915
2916             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2917               XSETFASTINT (marker, 0);
2918
2919             CHECK_NUMBER_COERCE_MARKER (marker);
2920             if ((XINT (from) < 0
2921                  ? TYPE_MINIMUM (regoff_t) <= XINT (from)
2922                  : XINT (from) <= TYPE_MAXIMUM (regoff_t))
2923                 && (XINT (marker) < 0
2924                     ? TYPE_MINIMUM (regoff_t) <= XINT (marker)
2925                     : XINT (marker) <= TYPE_MAXIMUM (regoff_t)))
2926               {
2927                 search_regs.start[i] = XINT (from);
2928                 search_regs.end[i] = XINT (marker);
2929               }
2930             else
2931               {
2932                 search_regs.start[i] = -1;
2933               }
2934
2935             if (!NILP (reseat) && MARKERP (m))
2936               {
2937                 unchain_marker (XMARKER (m));
2938                 XSETCAR (list, Qnil);
2939               }
2940           }
2941         list = XCDR (list);
2942       }
2943
2944     for (; i < search_regs.num_regs; i++)
2945       search_regs.start[i] = -1;
2946   }
2947
2948   return Qnil;
2949 }
2950
2951 /* If non-zero the match data have been saved in saved_search_regs
2952    during the execution of a sentinel or filter. */
2953 static int search_regs_saved;
2954 static struct re_registers saved_search_regs;
2955 static Lisp_Object saved_last_thing_searched;
2956
2957 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2958    if asynchronous code (filter or sentinel) is running. */
2959 static void
2960 save_search_regs (void)
2961 {
2962   if (!search_regs_saved)
2963     {
2964       saved_search_regs.num_regs = search_regs.num_regs;
2965       saved_search_regs.start = search_regs.start;
2966       saved_search_regs.end = search_regs.end;
2967       saved_last_thing_searched = last_thing_searched;
2968       last_thing_searched = Qnil;
2969       search_regs.num_regs = 0;
2970       search_regs.start = 0;
2971       search_regs.end = 0;
2972
2973       search_regs_saved = 1;
2974     }
2975 }
2976
2977 /* Called upon exit from filters and sentinels. */
2978 void
2979 restore_search_regs (void)
2980 {
2981   if (search_regs_saved)
2982     {
2983       if (search_regs.num_regs > 0)
2984         {
2985           xfree (search_regs.start);
2986           xfree (search_regs.end);
2987         }
2988       search_regs.num_regs = saved_search_regs.num_regs;
2989       search_regs.start = saved_search_regs.start;
2990       search_regs.end = saved_search_regs.end;
2991       last_thing_searched = saved_last_thing_searched;
2992       saved_last_thing_searched = Qnil;
2993       search_regs_saved = 0;
2994     }
2995 }
2996
2997 static Lisp_Object
2998 unwind_set_match_data (Lisp_Object list)
2999 {
3000   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3001   return Fset_match_data (list, Qt);
3002 }
3003
3004 /* Called to unwind protect the match data.  */
3005 void
3006 record_unwind_save_match_data (void)
3007 {
3008   record_unwind_protect (unwind_set_match_data,
3009                          Fmatch_data (Qnil, Qnil, Qnil));
3010 }
3011
3012 /* Quote a string to deactivate reg-expr chars */
3013
3014 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3015        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3016   (Lisp_Object string)
3017 {
3018   register char *in, *out, *end;
3019   register char *temp;
3020   int backslashes_added = 0;
3021
3022   CHECK_STRING (string);
3023
3024   temp = alloca (SBYTES (string) * 2);
3025
3026   /* Now copy the data into the new string, inserting escapes. */
3027
3028   in = SSDATA (string);
3029   end = in + SBYTES (string);
3030   out = temp;
3031
3032   for (; in != end; in++)
3033     {
3034       if (*in == '['
3035           || *in == '*' || *in == '.' || *in == '\\'
3036           || *in == '?' || *in == '+'
3037           || *in == '^' || *in == '$')
3038         *out++ = '\\', backslashes_added++;
3039       *out++ = *in;
3040     }
3041
3042   return make_specified_string (temp,
3043                                 SCHARS (string) + backslashes_added,
3044                                 out - temp,
3045                                 STRING_MULTIBYTE (string));
3046 }
3047 \f
3048 void
3049 syms_of_search (void)
3050 {
3051   register int i;
3052
3053   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3054     {
3055       searchbufs[i].buf.allocated = 100;
3056       searchbufs[i].buf.buffer = xmalloc (100);
3057       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3058       searchbufs[i].regexp = Qnil;
3059       searchbufs[i].whitespace_regexp = Qnil;
3060       searchbufs[i].syntax_table = Qnil;
3061       staticpro (&searchbufs[i].regexp);
3062       staticpro (&searchbufs[i].whitespace_regexp);
3063       staticpro (&searchbufs[i].syntax_table);
3064       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3065     }
3066   searchbuf_head = &searchbufs[0];
3067
3068   DEFSYM (Qsearch_failed, "search-failed");
3069   DEFSYM (Qinvalid_regexp, "invalid-regexp");
3070
3071   Fput (Qsearch_failed, Qerror_conditions,
3072         listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
3073   Fput (Qsearch_failed, Qerror_message,
3074         build_pure_c_string ("Search failed"));
3075
3076   Fput (Qinvalid_regexp, Qerror_conditions,
3077         listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
3078   Fput (Qinvalid_regexp, Qerror_message,
3079         build_pure_c_string ("Invalid regexp"));
3080
3081   last_thing_searched = Qnil;
3082   staticpro (&last_thing_searched);
3083
3084   saved_last_thing_searched = Qnil;
3085   staticpro (&saved_last_thing_searched);
3086
3087   DEFVAR_LISP ("search-spaces-regexp", Vsearch_spaces_regexp,
3088       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3089 Some commands use this for user-specified regexps.
3090 Spaces that occur inside character classes or repetition operators
3091 or other such regexp constructs are not replaced with this.
3092 A value of nil (which is the normal value) means treat spaces literally.  */);
3093   Vsearch_spaces_regexp = Qnil;
3094
3095   DEFVAR_LISP ("inhibit-changing-match-data", Vinhibit_changing_match_data,
3096       doc: /* Internal use only.
3097 If non-nil, the primitive searching and matching functions
3098 such as `looking-at', `string-match', `re-search-forward', etc.,
3099 do not set the match data.  The proper way to use this variable
3100 is to bind it with `let' around a small expression.  */);
3101   Vinhibit_changing_match_data = Qnil;
3102
3103   defsubr (&Slooking_at);
3104   defsubr (&Sposix_looking_at);
3105   defsubr (&Sstring_match);
3106   defsubr (&Sposix_string_match);
3107   defsubr (&Ssearch_forward);
3108   defsubr (&Ssearch_backward);
3109   defsubr (&Sre_search_forward);
3110   defsubr (&Sre_search_backward);
3111   defsubr (&Sposix_search_forward);
3112   defsubr (&Sposix_search_backward);
3113   defsubr (&Sreplace_match);
3114   defsubr (&Smatch_beginning);
3115   defsubr (&Smatch_end);
3116   defsubr (&Smatch_data);
3117   defsubr (&Sset_match_data);
3118   defsubr (&Sregexp_quote);
3119 }