code.delx.au - gnu-emacs/blob - src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "charset.h"
  28 #include "region-cache.h"
  29 #include "commands.h"
  30 #include "blockinput.h"
  31 #include "intervals.h"
  32
  33 #include <sys/types.h>
  34 #include "regex.h"
  35
  36 #define REGEXP_CACHE_SIZE 20
  37
  38 /* If the regexp is non-nil, then the buffer contains the compiled form
  39    of that regexp, suitable for searching.  */
  40 struct regexp_cache
  41 {
  42   struct regexp_cache *next;
  43   Lisp_Object regexp;
  44   struct re_pattern_buffer buf;
  45   char fastmap[0400];
  46   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  47   char posix;
  48 };
  49
  50 /* The instances of that struct.  */
  51 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  52
  53 /* The head of the linked list; points to the most recently used buffer.  */
  54 struct regexp_cache *searchbuf_head;
  55
  56
  57 /* Every call to re_match, etc., must pass &search_regs as the regs
  58    argument unless you can show it is unnecessary (i.e., if re_match
  59    is certainly going to be called again before region-around-match
  60    can be called).
  61
  62    Since the registers are now dynamically allocated, we need to make
  63    sure not to refer to the Nth register before checking that it has
  64    been allocated by checking search_regs.num_regs.
  65
  66    The regex code keeps track of whether it has allocated the search
  67    buffer using bits in the re_pattern_buffer.  This means that whenever
  68    you compile a new pattern, it completely forgets whether it has
  69    allocated any registers, and will allocate new registers the next
  70    time you call a searching or matching function.  Therefore, we need
  71    to call re_set_registers after compiling a new pattern or after
  72    setting the match registers, so that the regex functions will be
  73    able to free or re-allocate it properly.  */
  74 static struct re_registers search_regs;
  75
  76 /* The buffer in which the last search was performed, or
  77    Qt if the last search was done in a string;
  78    Qnil if no searching has been done yet.  */
  79 static Lisp_Object last_thing_searched;
  80
  81 /* error condition signaled when regexp compile_pattern fails */
  82
  83 Lisp_Object Qinvalid_regexp;
  84
  85 static void set_search_regs ();
  86 static void save_search_regs ();
  87 static int simple_search ();
  88 static int boyer_moore ();
  89 static int search_buffer ();
  90
  91 static void
  92 matcher_overflow ()
  93 {
  94   error ("Stack overflow in regexp matcher");
  95 }
  96
  97 #ifdef __STDC__
  98 #define CONST const
  99 #else
 100 #define CONST
 101 #endif
 102
 103 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 104    PATTERN is the pattern to compile.
 105    CP is the place to put the result.
 106    TRANSLATE is a translation table for ignoring case, or nil for none.
 107    REGP is the structure that says where to store the "register"
 108    values that will result from matching this pattern.
 109    If it is 0, we should compile the pattern not to record any
 110    subexpression bounds.
 111    POSIX is nonzero if we want full backtracking (POSIX style)
 112    for this pattern.  0 means backtrack only enough to get a valid match.
 113    MULTIBYTE is nonzero if we want to handle multibyte characters in
 114    PATTERN.  0 means all multibyte characters are recognized just as
 115    sequences of binary data.  */
 116
 117 static void
 118 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 119      struct regexp_cache *cp;
 120      Lisp_Object pattern;
 121      Lisp_Object translate;
 122      struct re_registers *regp;
 123      int posix;
 124      int multibyte;
 125 {
 126   char *raw_pattern;
 127   int raw_pattern_size;
 128   char *val;
 129   reg_syntax_t old;
 130
 131   /* MULTIBYTE says whether the text to be searched is multibyte.
 132      We must convert PATTERN to match that, or we will not really
 133      find things right.  */
 134
 135   if (multibyte == STRING_MULTIBYTE (pattern))
 136     {
 137       raw_pattern = (char *) XSTRING (pattern)->data;
 138       raw_pattern_size = STRING_BYTES (XSTRING (pattern));
 139     }
 140   else if (multibyte)
 141     {
 142       raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
 143                                                   XSTRING (pattern)->size);
 144       raw_pattern = (char *) alloca (raw_pattern_size + 1);
 145       copy_text (XSTRING (pattern)->data, raw_pattern,
 146                  XSTRING (pattern)->size, 0, 1);
 147     }
 148   else
 149     {
 150       /* Converting multibyte to single-byte.
 151
 152          ??? Perhaps this conversion should be done in a special way
 153          by subtracting nonascii-insert-offset from each non-ASCII char,
 154          so that only the multibyte chars which really correspond to
 155          the chosen single-byte character set can possibly match.  */
 156       raw_pattern_size = XSTRING (pattern)->size;
 157       raw_pattern = (char *) alloca (raw_pattern_size + 1);
 158       copy_text (XSTRING (pattern)->data, raw_pattern,
 159                  STRING_BYTES (XSTRING (pattern)), 1, 0);
 160     }
 161
 162   cp->regexp = Qnil;
 163   cp->buf.translate = (! NILP (translate) ? translate : 0);
 164   cp->posix = posix;
 165   cp->buf.multibyte = multibyte;
 166   BLOCK_INPUT;
 167   old = re_set_syntax (RE_SYNTAX_EMACS
 168                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 169   val = (char *) re_compile_pattern (raw_pattern, raw_pattern_size, &cp->buf);
 170   re_set_syntax (old);
 171   UNBLOCK_INPUT;
 172   if (val)
 173     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 174
 175   cp->regexp = Fcopy_sequence (pattern);
 176 }
 177
 178 /* Compile a regexp if necessary, but first check to see if there's one in
 179    the cache.
 180    PATTERN is the pattern to compile.
 181    TRANSLATE is a translation table for ignoring case, or nil for none.
 182    REGP is the structure that says where to store the "register"
 183    values that will result from matching this pattern.
 184    If it is 0, we should compile the pattern not to record any
 185    subexpression bounds.
 186    POSIX is nonzero if we want full backtracking (POSIX style)
 187    for this pattern.  0 means backtrack only enough to get a valid match.  */
 188
 189 struct re_pattern_buffer *
 190 compile_pattern (pattern, regp, translate, posix, multibyte)
 191      Lisp_Object pattern;
 192      struct re_registers *regp;
 193      Lisp_Object translate;
 194      int posix, multibyte;
 195 {
 196   struct regexp_cache *cp, **cpp;
 197
 198   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 199     {
 200       cp = *cpp;
 201       if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
 202           && !NILP (Fstring_equal (cp->regexp, pattern))
 203           && cp->buf.translate == (! NILP (translate) ? translate : 0)
 204           && cp->posix == posix
 205           && cp->buf.multibyte == multibyte)
 206         break;
 207
 208       /* If we're at the end of the cache, compile into the last cell.  */
 209       if (cp->next == 0)
 210         {
 211           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 212           break;
 213         }
 214     }
 215
 216   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 217      either because we found it in the cache or because we just compiled it.
 218      Move it to the front of the queue to mark it as most recently used.  */
 219   *cpp = cp->next;
 220   cp->next = searchbuf_head;
 221   searchbuf_head = cp;
 222
 223   /* Advise the searching functions about the space we have allocated
 224      for register data.  */
 225   if (regp)
 226     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 227
 228   return &cp->buf;
 229 }
 230
 231 /* Error condition used for failing searches */
 232 Lisp_Object Qsearch_failed;
 233
 234 Lisp_Object
 235 signal_failure (arg)
 236      Lisp_Object arg;
 237 {
 238   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 239   return Qnil;
 240 }
 241 \f
 242 static Lisp_Object
 243 looking_at_1 (string, posix)
 244      Lisp_Object string;
 245      int posix;
 246 {
 247   Lisp_Object val;
 248   unsigned char *p1, *p2;
 249   int s1, s2;
 250   register int i;
 251   struct re_pattern_buffer *bufp;
 252
 253   if (running_asynch_code)
 254     save_search_regs ();
 255
 256   CHECK_STRING (string, 0);
 257   bufp = compile_pattern (string, &search_regs,
 258                           (!NILP (current_buffer->case_fold_search)
 259                            ? DOWNCASE_TABLE : Qnil),
 260                           posix,
 261                           !NILP (current_buffer->enable_multibyte_characters));
 262
 263   immediate_quit = 1;
 264   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 265
 266   /* Get pointers and sizes of the two strings
 267      that make up the visible portion of the buffer. */
 268
 269   p1 = BEGV_ADDR;
 270   s1 = GPT_BYTE - BEGV_BYTE;
 271   p2 = GAP_END_ADDR;
 272   s2 = ZV_BYTE - GPT_BYTE;
 273   if (s1 < 0)
 274     {
 275       p2 = p1;
 276       s2 = ZV_BYTE - BEGV_BYTE;
 277       s1 = 0;
 278     }
 279   if (s2 < 0)
 280     {
 281       s1 = ZV_BYTE - BEGV_BYTE;
 282       s2 = 0;
 283     }
 284
 285   re_match_object = Qnil;
 286
 287   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 288                   PT_BYTE - BEGV_BYTE, &search_regs,
 289                   ZV_BYTE - BEGV_BYTE);
 290   if (i == -2)
 291     matcher_overflow ();
 292
 293   val = (0 <= i ? Qt : Qnil);
 294   if (i >= 0)
 295     for (i = 0; i < search_regs.num_regs; i++)
 296       if (search_regs.start[i] >= 0)
 297         {
 298           search_regs.start[i]
 299             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 300           search_regs.end[i]
 301             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 302         }
 303   XSETBUFFER (last_thing_searched, current_buffer);
 304   immediate_quit = 0;
 305   return val;
 306 }
 307
 308 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 309   "Return t if text after point matches regular expression REGEXP.\n\
 310 This function modifies the match data that `match-beginning',\n\
 311 `match-end' and `match-data' access; save and restore the match\n\
 312 data if you want to preserve them.")
 313   (regexp)
 314      Lisp_Object regexp;
 315 {
 316   return looking_at_1 (regexp, 0);
 317 }
 318
 319 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 320   "Return t if text after point matches regular expression REGEXP.\n\
 321 Find the longest match, in accord with Posix regular expression rules.\n\
 322 This function modifies the match data that `match-beginning',\n\
 323 `match-end' and `match-data' access; save and restore the match\n\
 324 data if you want to preserve them.")
 325   (regexp)
 326      Lisp_Object regexp;
 327 {
 328   return looking_at_1 (regexp, 1);
 329 }
 330 \f
 331 static Lisp_Object
 332 string_match_1 (regexp, string, start, posix)
 333      Lisp_Object regexp, string, start;
 334      int posix;
 335 {
 336   int val;
 337   struct re_pattern_buffer *bufp;
 338   int pos, pos_byte;
 339   int i;
 340
 341   if (running_asynch_code)
 342     save_search_regs ();
 343
 344   CHECK_STRING (regexp, 0);
 345   CHECK_STRING (string, 1);
 346
 347   if (NILP (start))
 348     pos = 0, pos_byte = 0;
 349   else
 350     {
 351       int len = XSTRING (string)->size;
 352
 353       CHECK_NUMBER (start, 2);
 354       pos = XINT (start);
 355       if (pos < 0 && -pos <= len)
 356         pos = len + pos;
 357       else if (0 > pos || pos > len)
 358         args_out_of_range (string, start);
 359       pos_byte = string_char_to_byte (string, pos);
 360     }
 361
 362   bufp = compile_pattern (regexp, &search_regs,
 363                           (!NILP (current_buffer->case_fold_search)
 364                            ? DOWNCASE_TABLE : Qnil),
 365                           posix,
 366                           STRING_MULTIBYTE (string));
 367   immediate_quit = 1;
 368   re_match_object = string;
 369
 370   val = re_search (bufp, (char *) XSTRING (string)->data,
 371                    STRING_BYTES (XSTRING (string)), pos_byte,
 372                    STRING_BYTES (XSTRING (string)) - pos_byte,
 373                    &search_regs);
 374   immediate_quit = 0;
 375   last_thing_searched = Qt;
 376   if (val == -2)
 377     matcher_overflow ();
 378   if (val < 0) return Qnil;
 379
 380   for (i = 0; i < search_regs.num_regs; i++)
 381     if (search_regs.start[i] >= 0)
 382       {
 383         search_regs.start[i]
 384           = string_byte_to_char (string, search_regs.start[i]);
 385         search_regs.end[i]
 386           = string_byte_to_char (string, search_regs.end[i]);
 387       }
 388
 389   return make_number (string_byte_to_char (string, val));
 390 }
 391
 392 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 393   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 394 If third arg START is non-nil, start search at that index in STRING.\n\
 395 For index of first char beyond the match, do (match-end 0).\n\
 396 `match-end' and `match-beginning' also give indices of substrings\n\
 397 matched by parenthesis constructs in the pattern.")
 398   (regexp, string, start)
 399      Lisp_Object regexp, string, start;
 400 {
 401   return string_match_1 (regexp, string, start, 0);
 402 }
 403
 404 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 405   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 406 Find the longest match, in accord with Posix regular expression rules.\n\
 407 If third arg START is non-nil, start search at that index in STRING.\n\
 408 For index of first char beyond the match, do (match-end 0).\n\
 409 `match-end' and `match-beginning' also give indices of substrings\n\
 410 matched by parenthesis constructs in the pattern.")
 411   (regexp, string, start)
 412      Lisp_Object regexp, string, start;
 413 {
 414   return string_match_1 (regexp, string, start, 1);
 415 }
 416
 417 /* Match REGEXP against STRING, searching all of STRING,
 418    and return the index of the match, or negative on failure.
 419    This does not clobber the match data.  */
 420
 421 int
 422 fast_string_match (regexp, string)
 423      Lisp_Object regexp, string;
 424 {
 425   int val;
 426   struct re_pattern_buffer *bufp;
 427
 428   bufp = compile_pattern (regexp, 0, Qnil,
 429                           0, STRING_MULTIBYTE (string));
 430   immediate_quit = 1;
 431   re_match_object = string;
 432
 433   val = re_search (bufp, (char *) XSTRING (string)->data,
 434                    STRING_BYTES (XSTRING (string)), 0,
 435                    STRING_BYTES (XSTRING (string)), 0);
 436   immediate_quit = 0;
 437   return val;
 438 }
 439
 440 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 441    and return the index of the match, or negative on failure.
 442    This does not clobber the match data.
 443    We assume that STRING contains single-byte characters.  */
 444
 445 extern Lisp_Object Vascii_downcase_table;
 446
 447 int
 448 fast_c_string_match_ignore_case (regexp, string)
 449      Lisp_Object regexp;
 450      char *string;
 451 {
 452   int val;
 453   struct re_pattern_buffer *bufp;
 454   int len = strlen (string);
 455
 456   regexp = string_make_unibyte (regexp);
 457   re_match_object = Qt;
 458   bufp = compile_pattern (regexp, 0,
 459                           Vascii_downcase_table, 0,
 460                           0);
 461   immediate_quit = 1;
 462   val = re_search (bufp, string, len, 0, len, 0);
 463   immediate_quit = 0;
 464   return val;
 465 }
 466 \f
 467 /* max and min.  */
 468
 469 static int
 470 max (a, b)
 471      int a, b;
 472 {
 473   return ((a > b) ? a : b);
 474 }
 475
 476 static int
 477 min (a, b)
 478      int a, b;
 479 {
 480   return ((a < b) ? a : b);
 481 }
 482
 483 \f
 484 /* The newline cache: remembering which sections of text have no newlines.  */
 485
 486 /* If the user has requested newline caching, make sure it's on.
 487    Otherwise, make sure it's off.
 488    This is our cheezy way of associating an action with the change of
 489    state of a buffer-local variable.  */
 490 static void
 491 newline_cache_on_off (buf)
 492      struct buffer *buf;
 493 {
 494   if (NILP (buf->cache_long_line_scans))
 495     {
 496       /* It should be off.  */
 497       if (buf->newline_cache)
 498         {
 499           free_region_cache (buf->newline_cache);
 500           buf->newline_cache = 0;
 501         }
 502     }
 503   else
 504     {
 505       /* It should be on.  */
 506       if (buf->newline_cache == 0)
 507         buf->newline_cache = new_region_cache ();
 508     }
 509 }
 510
 511 \f
 512 /* Search for COUNT instances of the character TARGET between START and END.
 513
 514    If COUNT is positive, search forwards; END must be >= START.
 515    If COUNT is negative, search backwards for the -COUNTth instance;
 516       END must be <= START.
 517    If COUNT is zero, do anything you please; run rogue, for all I care.
 518
 519    If END is zero, use BEGV or ZV instead, as appropriate for the
 520    direction indicated by COUNT.
 521
 522    If we find COUNT instances, set *SHORTAGE to zero, and return the
 523    position after the COUNTth match.  Note that for reverse motion
 524    this is not the same as the usual convention for Emacs motion commands.
 525
 526    If we don't find COUNT instances before reaching END, set *SHORTAGE
 527    to the number of TARGETs left unfound, and return END.
 528
 529    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 530    except when inside redisplay.  */
 531
 532 scan_buffer (target, start, end, count, shortage, allow_quit)
 533      register int target;
 534      int start, end;
 535      int count;
 536      int *shortage;
 537      int allow_quit;
 538 {
 539   struct region_cache *newline_cache;
 540   int direction;
 541
 542   if (count > 0)
 543     {
 544       direction = 1;
 545       if (! end) end = ZV;
 546     }
 547   else
 548     {
 549       direction = -1;
 550       if (! end) end = BEGV;
 551     }
 552
 553   newline_cache_on_off (current_buffer);
 554   newline_cache = current_buffer->newline_cache;
 555
 556   if (shortage != 0)
 557     *shortage = 0;
 558
 559   immediate_quit = allow_quit;
 560
 561   if (count > 0)
 562     while (start != end)
 563       {
 564         /* Our innermost scanning loop is very simple; it doesn't know
 565            about gaps, buffer ends, or the newline cache.  ceiling is
 566            the position of the last character before the next such
 567            obstacle --- the last character the dumb search loop should
 568            examine.  */
 569         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 570         int start_byte = CHAR_TO_BYTE (start);
 571
 572         /* If we're looking for a newline, consult the newline cache
 573            to see where we can avoid some scanning.  */
 574         if (target == '\n' && newline_cache)
 575           {
 576             int next_change;
 577             immediate_quit = 0;
 578             while (region_cache_forward
 579                    (current_buffer, newline_cache, start_byte, &next_change))
 580               start_byte = next_change;
 581             immediate_quit = allow_quit;
 582
 583             /* START should never be after END.  */
 584             if (start_byte > ceiling_byte)
 585               start_byte = ceiling_byte;
 586
 587             /* Now the text after start is an unknown region, and
 588                next_change is the position of the next known region. */
 589             ceiling_byte = min (next_change - 1, ceiling_byte);
 590           }
 591
 592         /* The dumb loop can only scan text stored in contiguous
 593            bytes. BUFFER_CEILING_OF returns the last character
 594            position that is contiguous, so the ceiling is the
 595            position after that.  */
 596         ceiling_byte = min (BUFFER_CEILING_OF (start_byte), ceiling_byte);
 597
 598         {
 599           /* The termination address of the dumb loop.  */
 600           register unsigned char *ceiling_addr
 601             = BYTE_POS_ADDR (ceiling_byte) + 1;
 602           register unsigned char *cursor
 603             = BYTE_POS_ADDR (start_byte);
 604           unsigned char *base = cursor;
 605
 606           while (cursor < ceiling_addr)
 607             {
 608               unsigned char *scan_start = cursor;
 609
 610               /* The dumb loop.  */
 611               while (*cursor != target && ++cursor < ceiling_addr)
 612                 ;
 613
 614               /* If we're looking for newlines, cache the fact that
 615                  the region from start to cursor is free of them. */
 616               if (target == '\n' && newline_cache)
 617                 know_region_cache (current_buffer, newline_cache,
 618                                    start_byte + scan_start - base,
 619                                    start_byte + cursor - base);
 620
 621               /* Did we find the target character?  */
 622               if (cursor < ceiling_addr)
 623                 {
 624                   if (--count == 0)
 625                     {
 626                       immediate_quit = 0;
 627                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 628                     }
 629                   cursor++;
 630                 }
 631             }
 632
 633           start = BYTE_TO_CHAR (start_byte + cursor - base);
 634         }
 635       }
 636   else
 637     while (start > end)
 638       {
 639         /* The last character to check before the next obstacle.  */
 640         int ceiling_byte = CHAR_TO_BYTE (end);
 641         int start_byte = CHAR_TO_BYTE (start);
 642
 643         /* Consult the newline cache, if appropriate.  */
 644         if (target == '\n' && newline_cache)
 645           {
 646             int next_change;
 647             immediate_quit = 0;
 648             while (region_cache_backward
 649                    (current_buffer, newline_cache, start_byte, &next_change))
 650               start_byte = next_change;
 651             immediate_quit = allow_quit;
 652
 653             /* Start should never be at or before end.  */
 654             if (start_byte <= ceiling_byte)
 655               start_byte = ceiling_byte + 1;
 656
 657             /* Now the text before start is an unknown region, and
 658                next_change is the position of the next known region. */
 659             ceiling_byte = max (next_change, ceiling_byte);
 660           }
 661
 662         /* Stop scanning before the gap.  */
 663         ceiling_byte = max (BUFFER_FLOOR_OF (start_byte - 1), ceiling_byte);
 664
 665         {
 666           /* The termination address of the dumb loop.  */
 667           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 668           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 669           unsigned char *base = cursor;
 670
 671           while (cursor >= ceiling_addr)
 672             {
 673               unsigned char *scan_start = cursor;
 674
 675               while (*cursor != target && --cursor >= ceiling_addr)
 676                 ;
 677
 678               /* If we're looking for newlines, cache the fact that
 679                  the region from after the cursor to start is free of them.  */
 680               if (target == '\n' && newline_cache)
 681                 know_region_cache (current_buffer, newline_cache,
 682                                    start_byte + cursor - base,
 683                                    start_byte + scan_start - base);
 684
 685               /* Did we find the target character?  */
 686               if (cursor >= ceiling_addr)
 687                 {
 688                   if (++count >= 0)
 689                     {
 690                       immediate_quit = 0;
 691                       return BYTE_TO_CHAR (start_byte + cursor - base);
 692                     }
 693                   cursor--;
 694                 }
 695             }
 696
 697           start = BYTE_TO_CHAR (start_byte + cursor - base);
 698         }
 699       }
 700
 701   immediate_quit = 0;
 702   if (shortage != 0)
 703     *shortage = count * direction;
 704   return start;
 705 }
 706 \f
 707 /* Search for COUNT instances of a line boundary, which means either a
 708    newline or (if selective display enabled) a carriage return.
 709    Start at START.  If COUNT is negative, search backwards.
 710
 711    We report the resulting position by calling TEMP_SET_PT_BOTH.
 712
 713    If we find COUNT instances. we position after (always after,
 714    even if scanning backwards) the COUNTth match, and return 0.
 715
 716    If we don't find COUNT instances before reaching the end of the
 717    buffer (or the beginning, if scanning backwards), we return
 718    the number of line boundaries left unfound, and position at
 719    the limit we bumped up against.
 720
 721    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 722    except in special cases.  */
 723
 724 int
 725 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 726      int start, start_byte;
 727      int limit, limit_byte;
 728      register int count;
 729      int allow_quit;
 730 {
 731   int direction = ((count > 0) ? 1 : -1);
 732
 733   register unsigned char *cursor;
 734   unsigned char *base;
 735
 736   register int ceiling;
 737   register unsigned char *ceiling_addr;
 738
 739   int old_immediate_quit = immediate_quit;
 740
 741   /* If we are not in selective display mode,
 742      check only for newlines.  */
 743   int selective_display = (!NILP (current_buffer->selective_display)
 744                            && !INTEGERP (current_buffer->selective_display));
 745
 746   /* The code that follows is like scan_buffer
 747      but checks for either newline or carriage return.  */
 748
 749   if (allow_quit)
 750     immediate_quit++;
 751
 752   start_byte = CHAR_TO_BYTE (start);
 753
 754   if (count > 0)
 755     {
 756       while (start_byte < limit_byte)
 757         {
 758           ceiling =  BUFFER_CEILING_OF (start_byte);
 759           ceiling = min (limit_byte - 1, ceiling);
 760           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 761           base = (cursor = BYTE_POS_ADDR (start_byte));
 762           while (1)
 763             {
 764               while (*cursor != '\n' && ++cursor != ceiling_addr)
 765                 ;
 766
 767               if (cursor != ceiling_addr)
 768                 {
 769                   if (--count == 0)
 770                     {
 771                       immediate_quit = old_immediate_quit;
 772                       start_byte = start_byte + cursor - base + 1;
 773                       start = BYTE_TO_CHAR (start_byte);
 774                       TEMP_SET_PT_BOTH (start, start_byte);
 775                       return 0;
 776                     }
 777                   else
 778                     if (++cursor == ceiling_addr)
 779                       break;
 780                 }
 781               else
 782                 break;
 783             }
 784           start_byte += cursor - base;
 785         }
 786     }
 787   else
 788     {
 789       while (start_byte > limit_byte)
 790         {
 791           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 792           ceiling = max (limit_byte, ceiling);
 793           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 794           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 795           while (1)
 796             {
 797               while (--cursor != ceiling_addr && *cursor != '\n')
 798                 ;
 799
 800               if (cursor != ceiling_addr)
 801                 {
 802                   if (++count == 0)
 803                     {
 804                       immediate_quit = old_immediate_quit;
 805                       /* Return the position AFTER the match we found.  */
 806                       start_byte = start_byte + cursor - base + 1;
 807                       start = BYTE_TO_CHAR (start_byte);
 808                       TEMP_SET_PT_BOTH (start, start_byte);
 809                       return 0;
 810                     }
 811                 }
 812               else
 813                 break;
 814             }
 815           /* Here we add 1 to compensate for the last decrement
 816              of CURSOR, which took it past the valid range.  */
 817           start_byte += cursor - base + 1;
 818         }
 819     }
 820
 821   TEMP_SET_PT_BOTH (limit, limit_byte);
 822   immediate_quit = old_immediate_quit;
 823
 824   return count * direction;
 825 }
 826
 827 int
 828 find_next_newline_no_quit (from, cnt)
 829      register int from, cnt;
 830 {
 831   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 832 }
 833
 834 /* Like find_next_newline, but returns position before the newline,
 835    not after, and only search up to TO.  This isn't just
 836    find_next_newline (...)-1, because you might hit TO.  */
 837
 838 int
 839 find_before_next_newline (from, to, cnt)
 840      int from, to, cnt;
 841 {
 842   int shortage;
 843   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 844
 845   if (shortage == 0)
 846     pos--;
 847
 848   return pos;
 849 }
 850 \f
 851 /* Subroutines of Lisp buffer search functions. */
 852
 853 static Lisp_Object
 854 search_command (string, bound, noerror, count, direction, RE, posix)
 855      Lisp_Object string, bound, noerror, count;
 856      int direction;
 857      int RE;
 858      int posix;
 859 {
 860   register int np;
 861   int lim, lim_byte;
 862   int n = direction;
 863
 864   if (!NILP (count))
 865     {
 866       CHECK_NUMBER (count, 3);
 867       n *= XINT (count);
 868     }
 869
 870   CHECK_STRING (string, 0);
 871   if (NILP (bound))
 872     {
 873       if (n > 0)
 874         lim = ZV, lim_byte = ZV_BYTE;
 875       else
 876         lim = BEGV, lim_byte = BEGV_BYTE;
 877     }
 878   else
 879     {
 880       CHECK_NUMBER_COERCE_MARKER (bound, 1);
 881       lim = XINT (bound);
 882       if (n > 0 ? lim < PT : lim > PT)
 883         error ("Invalid search bound (wrong side of point)");
 884       if (lim > ZV)
 885         lim = ZV, lim_byte = ZV_BYTE;
 886       else if (lim < BEGV)
 887         lim = BEGV, lim_byte = BEGV_BYTE;
 888       else
 889         lim_byte = CHAR_TO_BYTE (lim);
 890     }
 891
 892   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 893                       (!NILP (current_buffer->case_fold_search)
 894                        ? current_buffer->case_canon_table
 895                        : Qnil),
 896                       (!NILP (current_buffer->case_fold_search)
 897                        ? current_buffer->case_eqv_table
 898                        : Qnil),
 899                       posix);
 900   if (np <= 0)
 901     {
 902       if (NILP (noerror))
 903         return signal_failure (string);
 904       if (!EQ (noerror, Qt))
 905         {
 906           if (lim < BEGV || lim > ZV)
 907             abort ();
 908           SET_PT_BOTH (lim, lim_byte);
 909           return Qnil;
 910 #if 0 /* This would be clean, but maybe programs depend on
 911          a value of nil here.  */
 912           np = lim;
 913 #endif
 914         }
 915       else
 916         return Qnil;
 917     }
 918
 919   if (np < BEGV || np > ZV)
 920     abort ();
 921
 922   SET_PT (np);
 923
 924   return make_number (np);
 925 }
 926 \f
 927 /* Return 1 if REGEXP it matches just one constant string.  */
 928
 929 static int
 930 trivial_regexp_p (regexp)
 931      Lisp_Object regexp;
 932 {
 933   int len = STRING_BYTES (XSTRING (regexp));
 934   unsigned char *s = XSTRING (regexp)->data;
 935   unsigned char c;
 936   while (--len >= 0)
 937     {
 938       switch (*s++)
 939         {
 940         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 941           return 0;
 942         case '\\':
 943           if (--len < 0)
 944             return 0;
 945           switch (*s++)
 946             {
 947             case '|': case '(': case ')': case '`': case '\'': case 'b':
 948             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 949             case 'S': case '=':
 950             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 951             case '1': case '2': case '3': case '4': case '5':
 952             case '6': case '7': case '8': case '9':
 953               return 0;
 954             }
 955         }
 956     }
 957   return 1;
 958 }
 959
 960 /* Search for the n'th occurrence of STRING in the current buffer,
 961    starting at position POS and stopping at position LIM,
 962    treating STRING as a literal string if RE is false or as
 963    a regular expression if RE is true.
 964
 965    If N is positive, searching is forward and LIM must be greater than POS.
 966    If N is negative, searching is backward and LIM must be less than POS.
 967
 968    Returns -x if x occurrences remain to be found (x > 0),
 969    or else the position at the beginning of the Nth occurrence
 970    (if searching backward) or the end (if searching forward).
 971
 972    POSIX is nonzero if we want full backtracking (POSIX style)
 973    for this pattern.  0 means backtrack only enough to get a valid match.  */
 974
 975 #define TRANSLATE(out, trt, d)                  \
 976 do                                              \
 977   {                                             \
 978     if (! NILP (trt))                           \
 979       {                                         \
 980         Lisp_Object temp;                       \
 981         temp = Faref (trt, make_number (d));    \
 982         if (INTEGERP (temp))                    \
 983           out = XINT (temp);                    \
 984         else                                    \
 985           out = d;                              \
 986       }                                         \
 987     else                                        \
 988       out = d;                                  \
 989   }                                             \
 990 while (0)
 991
 992 static int
 993 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
 994                RE, trt, inverse_trt, posix)
 995      Lisp_Object string;
 996      int pos;
 997      int pos_byte;
 998      int lim;
 999      int lim_byte;
1000      int n;
1001      int RE;
1002      Lisp_Object trt;
1003      Lisp_Object inverse_trt;
1004      int posix;
1005 {
1006   int len = XSTRING (string)->size;
1007   int len_byte = STRING_BYTES (XSTRING (string));
1008   register int i;
1009
1010   if (running_asynch_code)
1011     save_search_regs ();
1012
1013   /* Null string is found at starting position.  */
1014   if (len == 0)
1015     {
1016       set_search_regs (pos, 0);
1017       return pos;
1018     }
1019
1020   /* Searching 0 times means don't move.  */
1021   if (n == 0)
1022     return pos;
1023
1024   if (RE && !trivial_regexp_p (string))
1025     {
1026       unsigned char *p1, *p2;
1027       int s1, s2;
1028       struct re_pattern_buffer *bufp;
1029
1030       bufp = compile_pattern (string, &search_regs, trt, posix,
1031                               !NILP (current_buffer->enable_multibyte_characters));
1032
1033       immediate_quit = 1;       /* Quit immediately if user types ^G,
1034                                    because letting this function finish
1035                                    can take too long. */
1036       QUIT;                     /* Do a pending quit right away,
1037                                    to avoid paradoxical behavior */
1038       /* Get pointers and sizes of the two strings
1039          that make up the visible portion of the buffer. */
1040
1041       p1 = BEGV_ADDR;
1042       s1 = GPT_BYTE - BEGV_BYTE;
1043       p2 = GAP_END_ADDR;
1044       s2 = ZV_BYTE - GPT_BYTE;
1045       if (s1 < 0)
1046         {
1047           p2 = p1;
1048           s2 = ZV_BYTE - BEGV_BYTE;
1049           s1 = 0;
1050         }
1051       if (s2 < 0)
1052         {
1053           s1 = ZV_BYTE - BEGV_BYTE;
1054           s2 = 0;
1055         }
1056       re_match_object = Qnil;
1057
1058       while (n < 0)
1059         {
1060           int val;
1061           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1062                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1063                              &search_regs,
1064                              /* Don't allow match past current point */
1065                              pos_byte - BEGV_BYTE);
1066           if (val == -2)
1067             {
1068               matcher_overflow ();
1069             }
1070           if (val >= 0)
1071             {
1072               pos_byte = search_regs.start[0] + BEGV_BYTE;
1073               for (i = 0; i < search_regs.num_regs; i++)
1074                 if (search_regs.start[i] >= 0)
1075                   {
1076                     search_regs.start[i]
1077                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1078                     search_regs.end[i]
1079                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1080                   }
1081               XSETBUFFER (last_thing_searched, current_buffer);
1082               /* Set pos to the new position. */
1083               pos = search_regs.start[0];
1084             }
1085           else
1086             {
1087               immediate_quit = 0;
1088               return (n);
1089             }
1090           n++;
1091         }
1092       while (n > 0)
1093         {
1094           int val;
1095           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1096                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1097                              &search_regs,
1098                              lim_byte - BEGV_BYTE);
1099           if (val == -2)
1100             {
1101               matcher_overflow ();
1102             }
1103           if (val >= 0)
1104             {
1105               pos_byte = search_regs.end[0] + BEGV_BYTE;
1106               for (i = 0; i < search_regs.num_regs; i++)
1107                 if (search_regs.start[i] >= 0)
1108                   {
1109                     search_regs.start[i]
1110                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1111                     search_regs.end[i]
1112                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1113                   }
1114               XSETBUFFER (last_thing_searched, current_buffer);
1115               pos = search_regs.end[0];
1116             }
1117           else
1118             {
1119               immediate_quit = 0;
1120               return (0 - n);
1121             }
1122           n--;
1123         }
1124       immediate_quit = 0;
1125       return (pos);
1126     }
1127   else                          /* non-RE case */
1128     {
1129       unsigned char *raw_pattern, *pat;
1130       int raw_pattern_size;
1131       int raw_pattern_size_byte;
1132       unsigned char *patbuf;
1133       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1134       unsigned char *base_pat = XSTRING (string)->data;
1135       int charset_base = -1;
1136       int simple = 1;
1137
1138       /* MULTIBYTE says whether the text to be searched is multibyte.
1139          We must convert PATTERN to match that, or we will not really
1140          find things right.  */
1141
1142       if (multibyte == STRING_MULTIBYTE (string))
1143         {
1144           raw_pattern = (char *) XSTRING (string)->data;
1145           raw_pattern_size = XSTRING (string)->size;
1146           raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
1147         }
1148       else if (multibyte)
1149         {
1150           raw_pattern_size = XSTRING (string)->size;
1151           raw_pattern_size_byte
1152             = count_size_as_multibyte (XSTRING (string)->data,
1153                                        raw_pattern_size);
1154           raw_pattern = (char *) alloca (raw_pattern_size_byte + 1);
1155           copy_text (XSTRING (string)->data, raw_pattern,
1156                      XSTRING (string)->size, 0, 1);
1157         }
1158       else
1159         {
1160           /* Converting multibyte to single-byte.
1161
1162              ??? Perhaps this conversion should be done in a special way
1163              by subtracting nonascii-insert-offset from each non-ASCII char,
1164              so that only the multibyte chars which really correspond to
1165              the chosen single-byte character set can possibly match.  */
1166           raw_pattern_size = XSTRING (string)->size;
1167           raw_pattern_size_byte = XSTRING (string)->size;
1168           raw_pattern = (char *) alloca (raw_pattern_size + 1);
1169           copy_text (XSTRING (string)->data, raw_pattern,
1170                      STRING_BYTES (XSTRING (string)), 1, 0);
1171         }
1172
1173       /* Copy and optionally translate the pattern.  */
1174       len = raw_pattern_size;
1175       len_byte = raw_pattern_size_byte;
1176       patbuf = (unsigned char *) alloca (len_byte);
1177       pat = patbuf;
1178       base_pat = raw_pattern;
1179       if (multibyte)
1180         {
1181           while (--len >= 0)
1182             {
1183               unsigned char workbuf[4], *str;
1184               int c, translated, inverse;
1185               int in_charlen, charlen;
1186
1187               /* If we got here and the RE flag is set, it's because we're
1188                  dealing with a regexp known to be trivial, so the backslash
1189                  just quotes the next character.  */
1190               if (RE && *base_pat == '\\')
1191                 {
1192                   len--;
1193                   len_byte--;
1194                   base_pat++;
1195                 }
1196
1197               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1198               /* Translate the character, if requested.  */
1199               TRANSLATE (translated, trt, c);
1200               /* If translation changed the byte-length, go back
1201                  to the original character.  */
1202               charlen = CHAR_STRING (translated, workbuf, str);
1203               if (in_charlen != charlen)
1204                 {
1205                   translated = c;
1206                   charlen = CHAR_STRING (c, workbuf, str);
1207                 }
1208
1209               TRANSLATE (inverse, inverse_trt, c);
1210
1211               /* Did this char actually get translated?
1212                  Would any other char get translated into it?  */
1213               if (translated != c || inverse != c)
1214                 {
1215                   /* Keep track of which character set row
1216                      contains the characters that need translation.  */
1217                   int charset_base_code = c & ~0xff;
1218                   if (charset_base == -1)
1219                     charset_base = charset_base_code;
1220                   else if (charset_base != charset_base_code)
1221                     /* If two different rows appear, needing translation,
1222                        then we cannot use boyer_moore search.  */
1223                     simple = 0;
1224                     /* ??? Handa: this must do simple = 0
1225                        if c is a composite character.  */
1226                 }
1227
1228               /* Store this character into the translated pattern.  */
1229               bcopy (str, pat, charlen);
1230               pat += charlen;
1231               base_pat += in_charlen;
1232               len_byte -= in_charlen;
1233             }
1234         }
1235       else
1236         {
1237           while (--len >= 0)
1238             {
1239               int c, translated, inverse;
1240
1241               /* If we got here and the RE flag is set, it's because we're
1242                  dealing with a regexp known to be trivial, so the backslash
1243                  just quotes the next character.  */
1244               if (RE && *base_pat == '\\')
1245                 {
1246                   len--;
1247                   base_pat++;
1248                 }
1249               c = *base_pat++;
1250               TRANSLATE (translated, trt, c);
1251               TRANSLATE (inverse, inverse_trt, c);
1252
1253               /* Did this char actually get translated?
1254                  Would any other char get translated into it?  */
1255               if (translated != c || inverse != c)
1256                 {
1257                   /* Keep track of which character set row
1258                      contains the characters that need translation.  */
1259                   int charset_base_code = c & ~0xff;
1260                   if (charset_base == -1)
1261                     charset_base = charset_base_code;
1262                   else if (charset_base != charset_base_code)
1263                     /* If two different rows appear, needing translation,
1264                        then we cannot use boyer_moore search.  */
1265                     simple = 0;
1266                 }
1267               *pat++ = translated;
1268             }
1269         }
1270
1271       len_byte = pat - patbuf;
1272       len = raw_pattern_size;
1273       pat = base_pat = patbuf;
1274
1275       if (simple)
1276         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1277                             pos, pos_byte, lim, lim_byte,
1278                             charset_base);
1279       else
1280         return simple_search (n, pat, len, len_byte, trt,
1281                               pos, pos_byte, lim, lim_byte);
1282     }
1283 }
1284 \f
1285 /* Do a simple string search N times for the string PAT,
1286    whose length is LEN/LEN_BYTE,
1287    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1288    TRT is the translation table.
1289
1290    Return the character position where the match is found.
1291    Otherwise, if M matches remained to be found, return -M.
1292
1293    This kind of search works regardless of what is in PAT and
1294    regardless of what is in TRT.  It is used in cases where
1295    boyer_moore cannot work.  */
1296
1297 static int
1298 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1299      int n;
1300      unsigned char *pat;
1301      int len, len_byte;
1302      Lisp_Object trt;
1303      int pos, pos_byte;
1304      int lim, lim_byte;
1305 {
1306   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1307   int forward = n > 0;
1308
1309   if (lim > pos && multibyte)
1310     while (n > 0)
1311       {
1312         while (1)
1313           {
1314             /* Try matching at position POS.  */
1315             int this_pos = pos;
1316             int this_pos_byte = pos_byte;
1317             int this_len = len;
1318             int this_len_byte = len_byte;
1319             unsigned char *p = pat;
1320             if (pos + len > lim)
1321               goto stop;
1322
1323             while (this_len > 0)
1324               {
1325                 int charlen, buf_charlen;
1326                 int pat_ch, buf_ch;
1327
1328                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1329                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1330                                                  ZV_BYTE - this_pos_byte,
1331                                                  buf_charlen);
1332                 TRANSLATE (buf_ch, trt, buf_ch);
1333
1334                 if (buf_ch != pat_ch)
1335                   break;
1336
1337                 this_len_byte -= charlen;
1338                 this_len--;
1339                 p += charlen;
1340
1341                 this_pos_byte += buf_charlen;
1342                 this_pos++;
1343               }
1344
1345             if (this_len == 0)
1346               {
1347                 pos += len;
1348                 pos_byte += len_byte;
1349                 break;
1350               }
1351
1352             INC_BOTH (pos, pos_byte);
1353           }
1354
1355         n--;
1356       }
1357   else if (lim > pos)
1358     while (n > 0)
1359       {
1360         while (1)
1361           {
1362             /* Try matching at position POS.  */
1363             int this_pos = pos;
1364             int this_len = len;
1365             unsigned char *p = pat;
1366
1367             if (pos + len > lim)
1368               goto stop;
1369
1370             while (this_len > 0)
1371               {
1372                 int pat_ch = *p++;
1373                 int buf_ch = FETCH_BYTE (this_pos);
1374                 TRANSLATE (buf_ch, trt, buf_ch);
1375
1376                 if (buf_ch != pat_ch)
1377                   break;
1378
1379                 this_len--;
1380                 this_pos++;
1381               }
1382
1383             if (this_len == 0)
1384               {
1385                 pos += len;
1386                 break;
1387               }
1388
1389             pos++;
1390           }
1391
1392         n--;
1393       }
1394   /* Backwards search.  */
1395   else if (lim < pos && multibyte)
1396     while (n < 0)
1397       {
1398         while (1)
1399           {
1400             /* Try matching at position POS.  */
1401             int this_pos = pos - len;
1402             int this_pos_byte = pos_byte - len_byte;
1403             int this_len = len;
1404             int this_len_byte = len_byte;
1405             unsigned char *p = pat;
1406
1407             if (pos - len < lim)
1408               goto stop;
1409
1410             while (this_len > 0)
1411               {
1412                 int charlen, buf_charlen;
1413                 int pat_ch, buf_ch;
1414
1415                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1416                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1417                                                  ZV_BYTE - this_pos_byte,
1418                                                  buf_charlen);
1419                 TRANSLATE (buf_ch, trt, buf_ch);
1420
1421                 if (buf_ch != pat_ch)
1422                   break;
1423
1424                 this_len_byte -= charlen;
1425                 this_len--;
1426                 p += charlen;
1427                 this_pos_byte += buf_charlen;
1428                 this_pos++;
1429               }
1430
1431             if (this_len == 0)
1432               {
1433                 pos -= len;
1434                 pos_byte -= len_byte;
1435                 break;
1436               }
1437
1438             DEC_BOTH (pos, pos_byte);
1439           }
1440
1441         n++;
1442       }
1443   else if (lim < pos)
1444     while (n < 0)
1445       {
1446         while (1)
1447           {
1448             /* Try matching at position POS.  */
1449             int this_pos = pos - len;
1450             int this_len = len;
1451             unsigned char *p = pat;
1452
1453             if (pos - len < lim)
1454               goto stop;
1455
1456             while (this_len > 0)
1457               {
1458                 int pat_ch = *p++;
1459                 int buf_ch = FETCH_BYTE (this_pos);
1460                 TRANSLATE (buf_ch, trt, buf_ch);
1461
1462                 if (buf_ch != pat_ch)
1463                   break;
1464                 this_len--;
1465                 this_pos++;
1466               }
1467
1468             if (this_len == 0)
1469               {
1470                 pos -= len;
1471                 break;
1472               }
1473
1474             pos--;
1475           }
1476
1477         n++;
1478       }
1479
1480  stop:
1481   if (n == 0)
1482     {
1483       if (forward)
1484         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1485       else
1486         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1487
1488       return pos;
1489     }
1490   else if (n > 0)
1491     return -n;
1492   else
1493     return n;
1494 }
1495 \f
1496 /* Do Boyer-Moore search N times for the string PAT,
1497    whose length is LEN/LEN_BYTE,
1498    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1499    DIRECTION says which direction we search in.
1500    TRT and INVERSE_TRT are translation tables.
1501
1502    This kind of search works if all the characters in PAT that have
1503    nontrivial translation are the same aside from the last byte.  This
1504    makes it possible to translate just the last byte of a character,
1505    and do so after just a simple test of the context.
1506
1507    If that criterion is not satisfied, do not call this function.  */
1508
1509 static int
1510 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1511              pos, pos_byte, lim, lim_byte, charset_base)
1512      int n;
1513      unsigned char *base_pat;
1514      int len, len_byte;
1515      Lisp_Object trt;
1516      Lisp_Object inverse_trt;
1517      int pos, pos_byte;
1518      int lim, lim_byte;
1519      int charset_base;
1520 {
1521   int direction = ((n > 0) ? 1 : -1);
1522   register int dirlen;
1523   int infinity, limit, k, stride_for_teases;
1524   register int *BM_tab;
1525   int *BM_tab_base;
1526   register unsigned char *cursor, *p_limit;
1527   register int i, j;
1528   unsigned char *pat;
1529   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1530
1531   unsigned char simple_translate[0400];
1532   int translate_prev_byte;
1533   int translate_anteprev_byte;
1534
1535 #ifdef C_ALLOCA
1536   int BM_tab_space[0400];
1537   BM_tab = &BM_tab_space[0];
1538 #else
1539   BM_tab = (int *) alloca (0400 * sizeof (int));
1540 #endif
1541   /* The general approach is that we are going to maintain that we know */
1542   /* the first (closest to the present position, in whatever direction */
1543   /* we're searching) character that could possibly be the last */
1544   /* (furthest from present position) character of a valid match.  We */
1545   /* advance the state of our knowledge by looking at that character */
1546   /* and seeing whether it indeed matches the last character of the */
1547   /* pattern.  If it does, we take a closer look.  If it does not, we */
1548   /* move our pointer (to putative last characters) as far as is */
1549   /* logically possible.  This amount of movement, which I call a */
1550   /* stride, will be the length of the pattern if the actual character */
1551   /* appears nowhere in the pattern, otherwise it will be the distance */
1552   /* from the last occurrence of that character to the end of the */
1553   /* pattern. */
1554   /* As a coding trick, an enormous stride is coded into the table for */
1555   /* characters that match the last character.  This allows use of only */
1556   /* a single test, a test for having gone past the end of the */
1557   /* permissible match region, to test for both possible matches (when */
1558   /* the stride goes past the end immediately) and failure to */
1559   /* match (where you get nudged past the end one stride at a time). */
1560
1561   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1562   /* is determined only by the last character of the putative match. */
1563   /* If that character does not match, we will stride the proper */
1564   /* distance to propose a match that superimposes it on the last */
1565   /* instance of a character that matches it (per trt), or misses */
1566   /* it entirely if there is none. */
1567
1568   dirlen = len_byte * direction;
1569   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1570   if (direction < 0)
1571     pat = (base_pat += len_byte - 1);
1572   else
1573     pat = base_pat;
1574   BM_tab_base = BM_tab;
1575   BM_tab += 0400;
1576   j = dirlen;           /* to get it in a register */
1577   /* A character that does not appear in the pattern induces a */
1578   /* stride equal to the pattern length. */
1579   while (BM_tab_base != BM_tab)
1580     {
1581       *--BM_tab = j;
1582       *--BM_tab = j;
1583       *--BM_tab = j;
1584       *--BM_tab = j;
1585     }
1586
1587   /* We use this for translation, instead of TRT itself.
1588      We fill this in to handle the characters that actually
1589      occur in the pattern.  Others don't matter anyway!  */
1590   bzero (simple_translate, sizeof simple_translate);
1591   for (i = 0; i < 0400; i++)
1592     simple_translate[i] = i;
1593
1594   i = 0;
1595   while (i != infinity)
1596     {
1597       unsigned char *ptr = pat + i;
1598       i += direction;
1599       if (i == dirlen)
1600         i = infinity;
1601       if (! NILP (trt))
1602         {
1603           int ch;
1604           int untranslated;
1605           int this_translated = 1;
1606
1607           if (multibyte
1608               && (ptr + 1 == pat + len_byte || CHAR_HEAD_P (ptr[1])))
1609             {
1610               unsigned char *charstart = ptr;
1611               while (! CHAR_HEAD_P (*charstart))
1612                 charstart--;
1613               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1614               if (charset_base == (untranslated & ~0xff))
1615                 {
1616                   TRANSLATE (ch, trt, untranslated);
1617                   if (! CHAR_HEAD_P (*ptr))
1618                     {
1619                       translate_prev_byte = ptr[-1];
1620                       if (! CHAR_HEAD_P (translate_prev_byte))
1621                         translate_anteprev_byte = ptr[-2];
1622                     }
1623                 }
1624               else
1625                 {
1626                   this_translated = 0;
1627                   ch = *ptr;
1628                 }
1629             }
1630           else if (!multibyte)
1631             TRANSLATE (ch, trt, *ptr);
1632           else
1633             {
1634               ch = *ptr;
1635               this_translated = 0;
1636             }
1637
1638           if (ch > 0400)
1639             j = ((unsigned char) ch) | 0200;
1640           else
1641             j = (unsigned char) ch;
1642
1643           if (i == infinity)
1644             stride_for_teases = BM_tab[j];
1645
1646           BM_tab[j] = dirlen - i;
1647           /* A translation table is accompanied by its inverse -- see */
1648           /* comment following downcase_table for details */
1649           if (this_translated)
1650             {
1651               int starting_ch = ch;
1652               int starting_j = j;
1653               while (1)
1654                 {
1655                   TRANSLATE (ch, inverse_trt, ch);
1656                   if (ch > 0400)
1657                     j = ((unsigned char) ch) | 0200;
1658                   else
1659                     j = (unsigned char) ch;
1660
1661                   /* For all the characters that map into CH,
1662                      set up simple_translate to map the last byte
1663                      into STARTING_J.  */
1664                   simple_translate[j] = starting_j;
1665                   if (ch == starting_ch)
1666                     break;
1667                   BM_tab[j] = dirlen - i;
1668                 }
1669             }
1670         }
1671       else
1672         {
1673           j = *ptr;
1674
1675           if (i == infinity)
1676             stride_for_teases = BM_tab[j];
1677           BM_tab[j] = dirlen - i;
1678         }
1679       /* stride_for_teases tells how much to stride if we get a */
1680       /* match on the far character but are subsequently */
1681       /* disappointed, by recording what the stride would have been */
1682       /* for that character if the last character had been */
1683       /* different. */
1684     }
1685   infinity = dirlen - infinity;
1686   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1687   /* loop invariant - POS_BYTE points at where last char (first
1688      char if reverse) of pattern would align in a possible match.  */
1689   while (n != 0)
1690     {
1691       int tail_end;
1692       unsigned char *tail_end_ptr;
1693
1694       /* It's been reported that some (broken) compiler thinks that
1695          Boolean expressions in an arithmetic context are unsigned.
1696          Using an explicit ?1:0 prevents this.  */
1697       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1698           < 0)
1699         return (n * (0 - direction));
1700       /* First we do the part we can by pointers (maybe nothing) */
1701       QUIT;
1702       pat = base_pat;
1703       limit = pos_byte - dirlen + direction;
1704       limit = ((direction > 0)
1705                ? BUFFER_CEILING_OF (limit)
1706                : BUFFER_FLOOR_OF (limit));
1707       /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1708          can take on without hitting edge of buffer or the gap.  */
1709       limit = ((direction > 0)
1710                ? min (lim_byte - 1, min (limit, pos_byte + 20000))
1711                : max (lim_byte, max (limit, pos_byte - 20000)));
1712       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1713       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1714
1715       if ((limit - pos_byte) * direction > 20)
1716         {
1717           unsigned char *p2;
1718
1719           p_limit = BYTE_POS_ADDR (limit);
1720           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1721           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1722           while (1)             /* use one cursor setting as long as i can */
1723             {
1724               if (direction > 0) /* worth duplicating */
1725                 {
1726                   /* Use signed comparison if appropriate
1727                      to make cursor+infinity sure to be > p_limit.
1728                      Assuming that the buffer lies in a range of addresses
1729                      that are all "positive" (as ints) or all "negative",
1730                      either kind of comparison will work as long
1731                      as we don't step by infinity.  So pick the kind
1732                      that works when we do step by infinity.  */
1733                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1734                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1735                       cursor += BM_tab[*cursor];
1736                   else
1737                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1738                       cursor += BM_tab[*cursor];
1739                 }
1740               else
1741                 {
1742                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1743                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1744                       cursor += BM_tab[*cursor];
1745                   else
1746                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1747                       cursor += BM_tab[*cursor];
1748                 }
1749 /* If you are here, cursor is beyond the end of the searched region. */
1750 /* This can happen if you match on the far character of the pattern, */
1751 /* because the "stride" of that character is infinity, a number able */
1752 /* to throw you well beyond the end of the search.  It can also */
1753 /* happen if you fail to match within the permitted region and would */
1754 /* otherwise try a character beyond that region */
1755               if ((cursor - p_limit) * direction <= len_byte)
1756                 break;  /* a small overrun is genuine */
1757               cursor -= infinity; /* large overrun = hit */
1758               i = dirlen - direction;
1759               if (! NILP (trt))
1760                 {
1761                   while ((i -= direction) + direction != 0)
1762                     {
1763                       int ch;
1764                       cursor -= direction;
1765                       /* Translate only the last byte of a character.  */
1766                       if (! multibyte
1767                           || ((cursor == tail_end_ptr
1768                                || CHAR_HEAD_P (cursor[1]))
1769                               && (CHAR_HEAD_P (cursor[0])
1770                                   || (translate_prev_byte == cursor[-1]
1771                                       && (CHAR_HEAD_P (translate_prev_byte)
1772                                           || translate_anteprev_byte == cursor[-2])))))
1773                         ch = simple_translate[*cursor];
1774                       else
1775                         ch = *cursor;
1776                       if (pat[i] != ch)
1777                         break;
1778                     }
1779                 }
1780               else
1781                 {
1782                   while ((i -= direction) + direction != 0)
1783                     {
1784                       cursor -= direction;
1785                       if (pat[i] != *cursor)
1786                         break;
1787                     }
1788                 }
1789               cursor += dirlen - i - direction; /* fix cursor */
1790               if (i + direction == 0)
1791                 {
1792                   int position;
1793
1794                   cursor -= direction;
1795
1796                   position = pos_byte + cursor - p2 + ((direction > 0)
1797                                                        ? 1 - len_byte : 0);
1798                   set_search_regs (position, len_byte);
1799
1800                   if ((n -= direction) != 0)
1801                     cursor += dirlen; /* to resume search */
1802                   else
1803                     return ((direction > 0)
1804                             ? search_regs.end[0] : search_regs.start[0]);
1805                 }
1806               else
1807                 cursor += stride_for_teases; /* <sigh> we lose -  */
1808             }
1809           pos_byte += cursor - p2;
1810         }
1811       else
1812         /* Now we'll pick up a clump that has to be done the hard */
1813         /* way because it covers a discontinuity */
1814         {
1815           limit = ((direction > 0)
1816                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1817                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1818           limit = ((direction > 0)
1819                    ? min (limit + len_byte, lim_byte - 1)
1820                    : max (limit - len_byte, lim_byte));
1821           /* LIMIT is now the last value POS_BYTE can have
1822              and still be valid for a possible match.  */
1823           while (1)
1824             {
1825               /* This loop can be coded for space rather than */
1826               /* speed because it will usually run only once. */
1827               /* (the reach is at most len + 21, and typically */
1828               /* does not exceed len) */
1829               while ((limit - pos_byte) * direction >= 0)
1830                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1831               /* now run the same tests to distinguish going off the */
1832               /* end, a match or a phony match. */
1833               if ((pos_byte - limit) * direction <= len_byte)
1834                 break;  /* ran off the end */
1835               /* Found what might be a match.
1836                  Set POS_BYTE back to last (first if reverse) pos.  */
1837               pos_byte -= infinity;
1838               i = dirlen - direction;
1839               while ((i -= direction) + direction != 0)
1840                 {
1841                   int ch;
1842                   unsigned char *ptr;
1843                   pos_byte -= direction;
1844                   ptr = BYTE_POS_ADDR (pos_byte);
1845                   /* Translate only the last byte of a character.  */
1846                   if (! multibyte
1847                       || ((ptr == tail_end_ptr
1848                            || CHAR_HEAD_P (ptr[1]))
1849                           && (CHAR_HEAD_P (ptr[0])
1850                               || (translate_prev_byte == ptr[-1]
1851                                   && (CHAR_HEAD_P (translate_prev_byte)
1852                                       || translate_anteprev_byte == ptr[-2])))))
1853                     ch = simple_translate[*ptr];
1854                   else
1855                     ch = *ptr;
1856                   if (pat[i] != ch)
1857                     break;
1858                 }
1859               /* Above loop has moved POS_BYTE part or all the way
1860                  back to the first pos (last pos if reverse).
1861                  Set it once again at the last (first if reverse) char.  */
1862               pos_byte += dirlen - i- direction;
1863               if (i + direction == 0)
1864                 {
1865                   int position;
1866                   pos_byte -= direction;
1867
1868                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1869
1870                   set_search_regs (position, len_byte);
1871
1872                   if ((n -= direction) != 0)
1873                     pos_byte += dirlen; /* to resume search */
1874                   else
1875                     return ((direction > 0)
1876                             ? search_regs.end[0] : search_regs.start[0]);
1877                 }
1878               else
1879                 pos_byte += stride_for_teases;
1880             }
1881           }
1882       /* We have done one clump.  Can we continue? */
1883       if ((lim_byte - pos_byte) * direction < 0)
1884         return ((0 - n) * direction);
1885     }
1886   return BYTE_TO_CHAR (pos_byte);
1887 }
1888
1889 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1890    for a match just found in the current buffer.  */
1891
1892 static void
1893 set_search_regs (beg_byte, nbytes)
1894      int beg_byte, nbytes;
1895 {
1896   /* Make sure we have registers in which to store
1897      the match position.  */
1898   if (search_regs.num_regs == 0)
1899     {
1900       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1901       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1902       search_regs.num_regs = 2;
1903     }
1904
1905   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1906   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1907   XSETBUFFER (last_thing_searched, current_buffer);
1908 }
1909 \f
1910 /* Given a string of words separated by word delimiters,
1911   compute a regexp that matches those exact words
1912   separated by arbitrary punctuation.  */
1913
1914 static Lisp_Object
1915 wordify (string)
1916      Lisp_Object string;
1917 {
1918   register unsigned char *p, *o;
1919   register int i, i_byte, len, punct_count = 0, word_count = 0;
1920   Lisp_Object val;
1921   int prev_c = 0;
1922   int adjust;
1923
1924   CHECK_STRING (string, 0);
1925   p = XSTRING (string)->data;
1926   len = XSTRING (string)->size;
1927
1928   for (i = 0, i_byte = 0; i < len; )
1929     {
1930       int c;
1931
1932       if (STRING_MULTIBYTE (string))
1933         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1934       else
1935         c = XSTRING (string)->data[i++];
1936
1937       if (SYNTAX (c) != Sword)
1938         {
1939           punct_count++;
1940           if (i > 0 && SYNTAX (prev_c) == Sword)
1941             word_count++;
1942         }
1943
1944       prev_c = c;
1945     }
1946
1947   if (SYNTAX (prev_c) == Sword)
1948     word_count++;
1949   if (!word_count)
1950     return build_string ("");
1951
1952   adjust = - punct_count + 5 * (word_count - 1) + 4;
1953   val = make_uninit_multibyte_string (len + adjust,
1954                                       STRING_BYTES (XSTRING (string)) + adjust);
1955
1956   o = XSTRING (val)->data;
1957   *o++ = '\\';
1958   *o++ = 'b';
1959
1960   for (i = 0; i < STRING_BYTES (XSTRING (val)); i++)
1961     if (SYNTAX (p[i]) == Sword)
1962       *o++ = p[i];
1963     else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1964       {
1965         *o++ = '\\';
1966         *o++ = 'W';
1967         *o++ = '\\';
1968         *o++ = 'W';
1969         *o++ = '*';
1970       }
1971
1972   *o++ = '\\';
1973   *o++ = 'b';
1974
1975   return val;
1976 }
1977 \f
1978 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1979   "MSearch backward: ",
1980   "Search backward from point for STRING.\n\
1981 Set point to the beginning of the occurrence found, and return point.\n\
1982 An optional second argument bounds the search; it is a buffer position.\n\
1983 The match found must not extend before that position.\n\
1984 Optional third argument, if t, means if fail just return nil (no error).\n\
1985  If not nil and not t, position at limit of search and return nil.\n\
1986 Optional fourth argument is repeat count--search for successive occurrences.\n\
1987 See also the functions `match-beginning', `match-end' and `replace-match'.")
1988   (string, bound, noerror, count)
1989      Lisp_Object string, bound, noerror, count;
1990 {
1991   return search_command (string, bound, noerror, count, -1, 0, 0);
1992 }
1993
1994 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
1995   "Search forward from point for STRING.\n\
1996 Set point to the end of the occurrence found, and return point.\n\
1997 An optional second argument bounds the search; it is a buffer position.\n\
1998 The match found must not extend after that position.  nil is equivalent\n\
1999   to (point-max).\n\
2000 Optional third argument, if t, means if fail just return nil (no error).\n\
2001   If not nil and not t, move to limit of search and return nil.\n\
2002 Optional fourth argument is repeat count--search for successive occurrences.\n\
2003 See also the functions `match-beginning', `match-end' and `replace-match'.")
2004   (string, bound, noerror, count)
2005      Lisp_Object string, bound, noerror, count;
2006 {
2007   return search_command (string, bound, noerror, count, 1, 0, 0);
2008 }
2009
2010 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2011   "sWord search backward: ",
2012   "Search backward from point for STRING, ignoring differences in punctuation.\n\
2013 Set point to the beginning of the occurrence found, and return point.\n\
2014 An optional second argument bounds the search; it is a buffer position.\n\
2015 The match found must not extend before that position.\n\
2016 Optional third argument, if t, means if fail just return nil (no error).\n\
2017   If not nil and not t, move to limit of search and return nil.\n\
2018 Optional fourth argument is repeat count--search for successive occurrences.")
2019   (string, bound, noerror, count)
2020      Lisp_Object string, bound, noerror, count;
2021 {
2022   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2023 }
2024
2025 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2026   "sWord search: ",
2027   "Search forward from point for STRING, ignoring differences in punctuation.\n\
2028 Set point to the end of the occurrence found, and return point.\n\
2029 An optional second argument bounds the search; it is a buffer position.\n\
2030 The match found must not extend after that position.\n\
2031 Optional third argument, if t, means if fail just return nil (no error).\n\
2032   If not nil and not t, move to limit of search and return nil.\n\
2033 Optional fourth argument is repeat count--search for successive occurrences.")
2034   (string, bound, noerror, count)
2035      Lisp_Object string, bound, noerror, count;
2036 {
2037   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2038 }
2039
2040 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2041   "sRE search backward: ",
2042   "Search backward from point for match for regular expression REGEXP.\n\
2043 Set point to the beginning of the match, and return point.\n\
2044 The match found is the one starting last in the buffer\n\
2045 and yet ending before the origin of the search.\n\
2046 An optional second argument bounds the search; it is a buffer position.\n\
2047 The match found must start at or after that position.\n\
2048 Optional third argument, if t, means if fail just return nil (no error).\n\
2049   If not nil and not t, move to limit of search and return nil.\n\
2050 Optional fourth argument is repeat count--search for successive occurrences.\n\
2051 See also the functions `match-beginning', `match-end' and `replace-match'.")
2052   (regexp, bound, noerror, count)
2053      Lisp_Object regexp, bound, noerror, count;
2054 {
2055   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2056 }
2057
2058 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2059   "sRE search: ",
2060   "Search forward from point for regular expression REGEXP.\n\
2061 Set point to the end of the occurrence found, and return point.\n\
2062 An optional second argument bounds the search; it is a buffer position.\n\
2063 The match found must not extend after that position.\n\
2064 Optional third argument, if t, means if fail just return nil (no error).\n\
2065   If not nil and not t, move to limit of search and return nil.\n\
2066 Optional fourth argument is repeat count--search for successive occurrences.\n\
2067 See also the functions `match-beginning', `match-end' and `replace-match'.")
2068   (regexp, bound, noerror, count)
2069      Lisp_Object regexp, bound, noerror, count;
2070 {
2071   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2072 }
2073
2074 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2075   "sPosix search backward: ",
2076   "Search backward from point for match for regular expression REGEXP.\n\
2077 Find the longest match in accord with Posix regular expression rules.\n\
2078 Set point to the beginning of the match, and return point.\n\
2079 The match found is the one starting last in the buffer\n\
2080 and yet ending before the origin of the search.\n\
2081 An optional second argument bounds the search; it is a buffer position.\n\
2082 The match found must start at or after that position.\n\
2083 Optional third argument, if t, means if fail just return nil (no error).\n\
2084   If not nil and not t, move to limit of search and return nil.\n\
2085 Optional fourth argument is repeat count--search for successive occurrences.\n\
2086 See also the functions `match-beginning', `match-end' and `replace-match'.")
2087   (regexp, bound, noerror, count)
2088      Lisp_Object regexp, bound, noerror, count;
2089 {
2090   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2091 }
2092
2093 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2094   "sPosix search: ",
2095   "Search forward from point for regular expression REGEXP.\n\
2096 Find the longest match in accord with Posix regular expression rules.\n\
2097 Set point to the end of the occurrence found, and return point.\n\
2098 An optional second argument bounds the search; it is a buffer position.\n\
2099 The match found must not extend after that position.\n\
2100 Optional third argument, if t, means if fail just return nil (no error).\n\
2101   If not nil and not t, move to limit of search and return nil.\n\
2102 Optional fourth argument is repeat count--search for successive occurrences.\n\
2103 See also the functions `match-beginning', `match-end' and `replace-match'.")
2104   (regexp, bound, noerror, count)
2105      Lisp_Object regexp, bound, noerror, count;
2106 {
2107   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2108 }
2109 \f
2110 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2111   "Replace text matched by last search with NEWTEXT.\n\
2112 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2113 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2114 based on the replaced text.\n\
2115 If the replaced text has only capital letters\n\
2116 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2117 If the replaced text has at least one word starting with a capital letter,\n\
2118 then capitalize each word in NEWTEXT.\n\n\
2119 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2120 Otherwise treat `\\' as special:\n\
2121   `\\&' in NEWTEXT means substitute original matched text.\n\
2122   `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2123        If Nth parens didn't match, substitute nothing.\n\
2124   `\\\\' means insert one `\\'.\n\
2125 FIXEDCASE and LITERAL are optional arguments.\n\
2126 Leaves point at end of replacement text.\n\
2127 \n\
2128 The optional fourth argument STRING can be a string to modify.\n\
2129 In that case, this function creates and returns a new string\n\
2130 which is made by replacing the part of STRING that was matched.\n\
2131 \n\
2132 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2133 It says to replace just that subexpression instead of the whole match.\n\
2134 This is useful only after a regular expression search or match\n\
2135 since only regular expressions have distinguished subexpressions.")
2136   (newtext, fixedcase, literal, string, subexp)
2137      Lisp_Object newtext, fixedcase, literal, string, subexp;
2138 {
2139   enum { nochange, all_caps, cap_initial } case_action;
2140   register int pos, last;
2141   int some_multiletter_word;
2142   int some_lowercase;
2143   int some_uppercase;
2144   int some_nonuppercase_initial;
2145   register int c, prevc;
2146   int inslen;
2147   int sub;
2148   int opoint, newpoint;
2149
2150   CHECK_STRING (newtext, 0);
2151
2152   if (! NILP (string))
2153     CHECK_STRING (string, 4);
2154
2155   case_action = nochange;       /* We tried an initialization */
2156                                 /* but some C compilers blew it */
2157
2158   if (search_regs.num_regs <= 0)
2159     error ("replace-match called before any match found");
2160
2161   if (NILP (subexp))
2162     sub = 0;
2163   else
2164     {
2165       CHECK_NUMBER (subexp, 3);
2166       sub = XINT (subexp);
2167       if (sub < 0 || sub >= search_regs.num_regs)
2168         args_out_of_range (subexp, make_number (search_regs.num_regs));
2169     }
2170
2171   if (NILP (string))
2172     {
2173       if (search_regs.start[sub] < BEGV
2174           || search_regs.start[sub] > search_regs.end[sub]
2175           || search_regs.end[sub] > ZV)
2176         args_out_of_range (make_number (search_regs.start[sub]),
2177                            make_number (search_regs.end[sub]));
2178     }
2179   else
2180     {
2181       if (search_regs.start[sub] < 0
2182           || search_regs.start[sub] > search_regs.end[sub]
2183           || search_regs.end[sub] > XSTRING (string)->size)
2184         args_out_of_range (make_number (search_regs.start[sub]),
2185                            make_number (search_regs.end[sub]));
2186     }
2187
2188   if (NILP (fixedcase))
2189     {
2190       int beg;
2191       /* Decide how to casify by examining the matched text. */
2192
2193       if (NILP (string))
2194         last = CHAR_TO_BYTE (search_regs.end[sub]);
2195       else
2196         last = search_regs.end[sub];
2197
2198       if (NILP (string))
2199         beg = CHAR_TO_BYTE (search_regs.start[sub]);
2200       else
2201         beg = search_regs.start[sub];
2202
2203       prevc = '\n';
2204       case_action = all_caps;
2205
2206       /* some_multiletter_word is set nonzero if any original word
2207          is more than one letter long. */
2208       some_multiletter_word = 0;
2209       some_lowercase = 0;
2210       some_nonuppercase_initial = 0;
2211       some_uppercase = 0;
2212
2213       for (pos = beg; pos < last; pos++)
2214         {
2215           if (NILP (string))
2216             c = FETCH_BYTE (pos);
2217           else
2218             c = XSTRING (string)->data[pos];
2219
2220           if (LOWERCASEP (c))
2221             {
2222               /* Cannot be all caps if any original char is lower case */
2223
2224               some_lowercase = 1;
2225               if (SYNTAX (prevc) != Sword)
2226                 some_nonuppercase_initial = 1;
2227               else
2228                 some_multiletter_word = 1;
2229             }
2230           else if (!NOCASEP (c))
2231             {
2232               some_uppercase = 1;
2233               if (SYNTAX (prevc) != Sword)
2234                 ;
2235               else
2236                 some_multiletter_word = 1;
2237             }
2238           else
2239             {
2240               /* If the initial is a caseless word constituent,
2241                  treat that like a lowercase initial.  */
2242               if (SYNTAX (prevc) != Sword)
2243                 some_nonuppercase_initial = 1;
2244             }
2245
2246           prevc = c;
2247         }
2248
2249       /* Convert to all caps if the old text is all caps
2250          and has at least one multiletter word.  */
2251       if (! some_lowercase && some_multiletter_word)
2252         case_action = all_caps;
2253       /* Capitalize each word, if the old text has all capitalized words.  */
2254       else if (!some_nonuppercase_initial && some_multiletter_word)
2255         case_action = cap_initial;
2256       else if (!some_nonuppercase_initial && some_uppercase)
2257         /* Should x -> yz, operating on X, give Yz or YZ?
2258            We'll assume the latter.  */
2259         case_action = all_caps;
2260       else
2261         case_action = nochange;
2262     }
2263
2264   /* Do replacement in a string.  */
2265   if (!NILP (string))
2266     {
2267       Lisp_Object before, after;
2268
2269       before = Fsubstring (string, make_number (0),
2270                            make_number (search_regs.start[sub]));
2271       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2272
2273       /* Substitute parts of the match into NEWTEXT
2274          if desired.  */
2275       if (NILP (literal))
2276         {
2277           int lastpos = -1;
2278           int lastpos_byte = -1;
2279           /* We build up the substituted string in ACCUM.  */
2280           Lisp_Object accum;
2281           Lisp_Object middle;
2282           int pos_byte;
2283
2284           accum = Qnil;
2285
2286           for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
2287             {
2288               int substart = -1;
2289               int subend;
2290               int delbackslash = 0;
2291
2292               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2293
2294               if (c == '\\')
2295                 {
2296                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2297                   if (c == '&')
2298                     {
2299                       substart = search_regs.start[sub];
2300                       subend = search_regs.end[sub];
2301                     }
2302                   else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2303                     {
2304                       if (search_regs.start[c - '0'] >= 0)
2305                         {
2306                           substart = search_regs.start[c - '0'];
2307                           subend = search_regs.end[c - '0'];
2308                         }
2309                     }
2310                   else if (c == '\\')
2311                     delbackslash = 1;
2312                   else
2313                     error ("Invalid use of `\\' in replacement text");
2314                 }
2315               if (substart >= 0)
2316                 {
2317                   if (pos - 1 != lastpos + 1)
2318                     middle = substring_both (newtext, lastpos + 1,
2319                                              lastpos_byte + 1,
2320                                              pos - 1, pos_byte - 1);
2321                   else
2322                     middle = Qnil;
2323                   accum = concat3 (accum, middle,
2324                                    Fsubstring (string,
2325                                                make_number (substart),
2326                                                make_number (subend)));
2327                   lastpos = pos;
2328                   lastpos_byte = pos_byte;
2329                 }
2330               else if (delbackslash)
2331                 {
2332                   middle = substring_both (newtext, lastpos + 1,
2333                                            lastpos_byte + 1,
2334                                            pos, pos_byte);
2335
2336                   accum = concat2 (accum, middle);
2337                   lastpos = pos;
2338                   lastpos_byte = pos_byte;
2339                 }
2340             }
2341
2342           if (pos != lastpos + 1)
2343             middle = substring_both (newtext, lastpos + 1,
2344                                      lastpos_byte + 1,
2345                                      pos, pos_byte);
2346           else
2347             middle = Qnil;
2348
2349           newtext = concat2 (accum, middle);
2350         }
2351
2352       /* Do case substitution in NEWTEXT if desired.  */
2353       if (case_action == all_caps)
2354         newtext = Fupcase (newtext);
2355       else if (case_action == cap_initial)
2356         newtext = Fupcase_initials (newtext);
2357
2358       return concat3 (before, newtext, after);
2359     }
2360
2361   /* Record point, the move (quietly) to the start of the match.  */
2362   if (PT > search_regs.start[sub])
2363     opoint = PT - ZV;
2364   else
2365     opoint = PT;
2366
2367   TEMP_SET_PT (search_regs.start[sub]);
2368
2369   /* We insert the replacement text before the old text, and then
2370      delete the original text.  This means that markers at the
2371      beginning or end of the original will float to the corresponding
2372      position in the replacement.  */
2373   if (!NILP (literal))
2374     Finsert_and_inherit (1, &newtext);
2375   else
2376     {
2377       struct gcpro gcpro1;
2378       GCPRO1 (newtext);
2379
2380       for (pos = 0; pos < XSTRING (newtext)->size; pos++)
2381         {
2382           int offset = PT - search_regs.start[sub];
2383
2384           c = XSTRING (newtext)->data[pos];
2385           if (c == '\\')
2386             {
2387               c = XSTRING (newtext)->data[++pos];
2388               if (c == '&')
2389                 Finsert_buffer_substring
2390                   (Fcurrent_buffer (),
2391                    make_number (search_regs.start[sub] + offset),
2392                    make_number (search_regs.end[sub] + offset));
2393               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2394                 {
2395                   if (search_regs.start[c - '0'] >= 1)
2396                     Finsert_buffer_substring
2397                       (Fcurrent_buffer (),
2398                        make_number (search_regs.start[c - '0'] + offset),
2399                        make_number (search_regs.end[c - '0'] + offset));
2400                 }
2401               else if (c == '\\')
2402                 insert_char (c);
2403               else
2404                 error ("Invalid use of `\\' in replacement text");
2405             }
2406           else
2407             insert_char (c);
2408         }
2409       UNGCPRO;
2410     }
2411
2412   inslen = PT - (search_regs.start[sub]);
2413   del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
2414
2415   if (case_action == all_caps)
2416     Fupcase_region (make_number (PT - inslen), make_number (PT));
2417   else if (case_action == cap_initial)
2418     Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
2419
2420   newpoint = PT;
2421
2422   /* Put point back where it was in the text.  */
2423   if (opoint <= 0)
2424     TEMP_SET_PT (opoint + ZV);
2425   else
2426     TEMP_SET_PT (opoint);
2427
2428   /* Now move point "officially" to the start of the inserted replacement.  */
2429   move_if_not_intangible (newpoint);
2430
2431   return Qnil;
2432 }
2433 \f
2434 static Lisp_Object
2435 match_limit (num, beginningp)
2436      Lisp_Object num;
2437      int beginningp;
2438 {
2439   register int n;
2440
2441   CHECK_NUMBER (num, 0);
2442   n = XINT (num);
2443   if (n < 0 || n >= search_regs.num_regs)
2444     args_out_of_range (num, make_number (search_regs.num_regs));
2445   if (search_regs.num_regs <= 0
2446       || search_regs.start[n] < 0)
2447     return Qnil;
2448   return (make_number ((beginningp) ? search_regs.start[n]
2449                                     : search_regs.end[n]));
2450 }
2451
2452 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2453   "Return position of start of text matched by last search.\n\
2454 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2455   regexp.\n\
2456 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2457   SUBEXP pairs.\n\
2458 Zero means the entire text matched by the whole regexp or whole string.")
2459   (subexp)
2460      Lisp_Object subexp;
2461 {
2462   return match_limit (subexp, 1);
2463 }
2464
2465 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2466   "Return position of end of text matched by last search.\n\
2467 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2468   regexp.\n\
2469 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2470   SUBEXP pairs.\n\
2471 Zero means the entire text matched by the whole regexp or whole string.")
2472   (subexp)
2473      Lisp_Object subexp;
2474 {
2475   return match_limit (subexp, 0);
2476 }
2477
2478 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2479   "Return a list containing all info on what the last search matched.\n\
2480 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2481 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2482 if the last match was on a buffer; integers or nil if a string was matched.\n\
2483 Use `store-match-data' to reinstate the data in this list.\n\
2484 \n\
2485 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2486 \(rather than markers) to represent buffer positions.\n\
2487 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough\n\
2488 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2489   (integers, reuse)
2490      Lisp_Object integers, reuse;
2491 {
2492   Lisp_Object tail, prev;
2493   Lisp_Object *data;
2494   int i, len;
2495
2496   if (NILP (last_thing_searched))
2497     return Qnil;
2498
2499   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2500                                  * sizeof (Lisp_Object));
2501
2502   len = -1;
2503   for (i = 0; i < search_regs.num_regs; i++)
2504     {
2505       int start = search_regs.start[i];
2506       if (start >= 0)
2507         {
2508           if (EQ (last_thing_searched, Qt)
2509               || ! NILP (integers))
2510             {
2511               XSETFASTINT (data[2 * i], start);
2512               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2513             }
2514           else if (BUFFERP (last_thing_searched))
2515             {
2516               data[2 * i] = Fmake_marker ();
2517               Fset_marker (data[2 * i],
2518                            make_number (start),
2519                            last_thing_searched);
2520               data[2 * i + 1] = Fmake_marker ();
2521               Fset_marker (data[2 * i + 1],
2522                            make_number (search_regs.end[i]),
2523                            last_thing_searched);
2524             }
2525           else
2526             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2527             abort ();
2528
2529           len = i;
2530         }
2531       else
2532         data[2 * i] = data [2 * i + 1] = Qnil;
2533     }
2534
2535   /* If REUSE is not usable, cons up the values and return them.  */
2536   if (! CONSP (reuse))
2537     return Flist (2 * len + 2, data);
2538
2539   /* If REUSE is a list, store as many value elements as will fit
2540      into the elements of REUSE.  */
2541   for (i = 0, tail = reuse; CONSP (tail);
2542        i++, tail = XCONS (tail)->cdr)
2543     {
2544       if (i < 2 * len + 2)
2545         XCONS (tail)->car = data[i];
2546       else
2547         XCONS (tail)->car = Qnil;
2548       prev = tail;
2549     }
2550
2551   /* If we couldn't fit all value elements into REUSE,
2552      cons up the rest of them and add them to the end of REUSE.  */
2553   if (i < 2 * len + 2)
2554     XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2555
2556   return reuse;
2557 }
2558
2559
2560 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2561   "Set internal data on last search match from elements of LIST.\n\
2562 LIST should have been created by calling `match-data' previously.")
2563   (list)
2564      register Lisp_Object list;
2565 {
2566   register int i;
2567   register Lisp_Object marker;
2568
2569   if (running_asynch_code)
2570     save_search_regs ();
2571
2572   if (!CONSP (list) && !NILP (list))
2573     list = wrong_type_argument (Qconsp, list);
2574
2575   /* Unless we find a marker with a buffer in LIST, assume that this
2576      match data came from a string.  */
2577   last_thing_searched = Qt;
2578
2579   /* Allocate registers if they don't already exist.  */
2580   {
2581     int length = XFASTINT (Flength (list)) / 2;
2582
2583     if (length > search_regs.num_regs)
2584       {
2585         if (search_regs.num_regs == 0)
2586           {
2587             search_regs.start
2588               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2589             search_regs.end
2590               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2591           }
2592         else
2593           {
2594             search_regs.start
2595               = (regoff_t *) xrealloc (search_regs.start,
2596                                        length * sizeof (regoff_t));
2597             search_regs.end
2598               = (regoff_t *) xrealloc (search_regs.end,
2599                                        length * sizeof (regoff_t));
2600           }
2601
2602         search_regs.num_regs = length;
2603       }
2604   }
2605
2606   for (i = 0; i < search_regs.num_regs; i++)
2607     {
2608       marker = Fcar (list);
2609       if (NILP (marker))
2610         {
2611           search_regs.start[i] = -1;
2612           list = Fcdr (list);
2613         }
2614       else
2615         {
2616           if (MARKERP (marker))
2617             {
2618               if (XMARKER (marker)->buffer == 0)
2619                 XSETFASTINT (marker, 0);
2620               else
2621                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2622             }
2623
2624           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2625           search_regs.start[i] = XINT (marker);
2626           list = Fcdr (list);
2627
2628           marker = Fcar (list);
2629           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2630             XSETFASTINT (marker, 0);
2631
2632           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2633           search_regs.end[i] = XINT (marker);
2634         }
2635       list = Fcdr (list);
2636     }
2637
2638   return Qnil;
2639 }
2640
2641 /* If non-zero the match data have been saved in saved_search_regs
2642    during the execution of a sentinel or filter. */
2643 static int search_regs_saved;
2644 static struct re_registers saved_search_regs;
2645
2646 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2647    if asynchronous code (filter or sentinel) is running. */
2648 static void
2649 save_search_regs ()
2650 {
2651   if (!search_regs_saved)
2652     {
2653       saved_search_regs.num_regs = search_regs.num_regs;
2654       saved_search_regs.start = search_regs.start;
2655       saved_search_regs.end = search_regs.end;
2656       search_regs.num_regs = 0;
2657       search_regs.start = 0;
2658       search_regs.end = 0;
2659
2660       search_regs_saved = 1;
2661     }
2662 }
2663
2664 /* Called upon exit from filters and sentinels. */
2665 void
2666 restore_match_data ()
2667 {
2668   if (search_regs_saved)
2669     {
2670       if (search_regs.num_regs > 0)
2671         {
2672           xfree (search_regs.start);
2673           xfree (search_regs.end);
2674         }
2675       search_regs.num_regs = saved_search_regs.num_regs;
2676       search_regs.start = saved_search_regs.start;
2677       search_regs.end = saved_search_regs.end;
2678
2679       search_regs_saved = 0;
2680     }
2681 }
2682
2683 /* Quote a string to inactivate reg-expr chars */
2684
2685 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2686   "Return a regexp string which matches exactly STRING and nothing else.")
2687   (string)
2688      Lisp_Object string;
2689 {
2690   register unsigned char *in, *out, *end;
2691   register unsigned char *temp;
2692   int backslashes_added = 0;
2693
2694   CHECK_STRING (string, 0);
2695
2696   temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
2697
2698   /* Now copy the data into the new string, inserting escapes. */
2699
2700   in = XSTRING (string)->data;
2701   end = in + STRING_BYTES (XSTRING (string));
2702   out = temp;
2703
2704   for (; in != end; in++)
2705     {
2706       if (*in == '[' || *in == ']'
2707           || *in == '*' || *in == '.' || *in == '\\'
2708           || *in == '?' || *in == '+'
2709           || *in == '^' || *in == '$')
2710         *out++ = '\\', backslashes_added++;
2711       *out++ = *in;
2712     }
2713
2714   return make_specified_string (temp,
2715                                 XSTRING (string)->size + backslashes_added,
2716                                 out - temp,
2717                                 STRING_MULTIBYTE (string));
2718 }
2719 \f
2720 syms_of_search ()
2721 {
2722   register int i;
2723
2724   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2725     {
2726       searchbufs[i].buf.allocated = 100;
2727       searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2728       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2729       searchbufs[i].regexp = Qnil;
2730       staticpro (&searchbufs[i].regexp);
2731       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2732     }
2733   searchbuf_head = &searchbufs[0];
2734
2735   Qsearch_failed = intern ("search-failed");
2736   staticpro (&Qsearch_failed);
2737   Qinvalid_regexp = intern ("invalid-regexp");
2738   staticpro (&Qinvalid_regexp);
2739
2740   Fput (Qsearch_failed, Qerror_conditions,
2741         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2742   Fput (Qsearch_failed, Qerror_message,
2743         build_string ("Search failed"));
2744
2745   Fput (Qinvalid_regexp, Qerror_conditions,
2746         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2747   Fput (Qinvalid_regexp, Qerror_message,
2748         build_string ("Invalid regexp"));
2749
2750   last_thing_searched = Qnil;
2751   staticpro (&last_thing_searched);
2752
2753   defsubr (&Slooking_at);
2754   defsubr (&Sposix_looking_at);
2755   defsubr (&Sstring_match);
2756   defsubr (&Sposix_string_match);
2757   defsubr (&Ssearch_forward);
2758   defsubr (&Ssearch_backward);
2759   defsubr (&Sword_search_forward);
2760   defsubr (&Sword_search_backward);
2761   defsubr (&Sre_search_forward);
2762   defsubr (&Sre_search_backward);
2763   defsubr (&Sposix_search_forward);
2764   defsubr (&Sposix_search_backward);
2765   defsubr (&Sreplace_match);
2766   defsubr (&Smatch_beginning);
2767   defsubr (&Smatch_end);
2768   defsubr (&Smatch_data);
2769   defsubr (&Sset_match_data);
2770   defsubr (&Sregexp_quote);
2771 }