code.delx.au - gnu-emacs/blob - src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #ifdef STDC_HEADERS
  24 #include <stdlib.h>
  25 #endif
  26 #include "lisp.h"
  27 #include "syntax.h"
  28 #include "category.h"
  29 #include "buffer.h"
  30 #include "charset.h"
  31 #include "region-cache.h"
  32 #include "commands.h"
  33 #include "blockinput.h"
  34 #include "intervals.h"
  35
  36 #include <sys/types.h>
  37 #include "regex.h"
  38
  39 #define min(a, b) ((a) < (b) ? (a) : (b))
  40 #define max(a, b) ((a) > (b) ? (a) : (b))
  41
  42 #define REGEXP_CACHE_SIZE 20
  43
  44 /* If the regexp is non-nil, then the buffer contains the compiled form
  45    of that regexp, suitable for searching.  */
  46 struct regexp_cache
  47 {
  48   struct regexp_cache *next;
  49   Lisp_Object regexp;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 static void set_search_regs ();
  92 static void save_search_regs ();
  93 static int simple_search ();
  94 static int boyer_moore ();
  95 static int search_buffer ();
  96
  97 static void
  98 matcher_overflow ()
  99 {
 100   error ("Stack overflow in regexp matcher");
 101 }
 102
 103 #ifdef __STDC__
 104 #define CONST const
 105 #else
 106 #define CONST
 107 #endif
 108
 109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 110    PATTERN is the pattern to compile.
 111    CP is the place to put the result.
 112    TRANSLATE is a translation table for ignoring case, or nil for none.
 113    REGP is the structure that says where to store the "register"
 114    values that will result from matching this pattern.
 115    If it is 0, we should compile the pattern not to record any
 116    subexpression bounds.
 117    POSIX is nonzero if we want full backtracking (POSIX style)
 118    for this pattern.  0 means backtrack only enough to get a valid match.
 119    MULTIBYTE is nonzero if we want to handle multibyte characters in
 120    PATTERN.  0 means all multibyte characters are recognized just as
 121    sequences of binary data.  */
 122
 123 static void
 124 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 125      struct regexp_cache *cp;
 126      Lisp_Object pattern;
 127      Lisp_Object translate;
 128      struct re_registers *regp;
 129      int posix;
 130      int multibyte;
 131 {
 132   unsigned char *raw_pattern;
 133   int raw_pattern_size;
 134   char *val;
 135   reg_syntax_t old;
 136
 137   /* MULTIBYTE says whether the text to be searched is multibyte.
 138      We must convert PATTERN to match that, or we will not really
 139      find things right.  */
 140
 141   if (multibyte == STRING_MULTIBYTE (pattern))
 142     {
 143       raw_pattern = (unsigned char *) XSTRING (pattern)->data;
 144       raw_pattern_size = STRING_BYTES (XSTRING (pattern));
 145     }
 146   else if (multibyte)
 147     {
 148       raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
 149                                                   XSTRING (pattern)->size);
 150       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 151       copy_text (XSTRING (pattern)->data, raw_pattern,
 152                  XSTRING (pattern)->size, 0, 1);
 153     }
 154   else
 155     {
 156       /* Converting multibyte to single-byte.
 157
 158          ??? Perhaps this conversion should be done in a special way
 159          by subtracting nonascii-insert-offset from each non-ASCII char,
 160          so that only the multibyte chars which really correspond to
 161          the chosen single-byte character set can possibly match.  */
 162       raw_pattern_size = XSTRING (pattern)->size;
 163       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 164       copy_text (XSTRING (pattern)->data, raw_pattern,
 165                  STRING_BYTES (XSTRING (pattern)), 1, 0);
 166     }
 167
 168   cp->regexp = Qnil;
 169   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 170   cp->posix = posix;
 171   cp->buf.multibyte = multibyte;
 172   BLOCK_INPUT;
 173   old = re_set_syntax (RE_SYNTAX_EMACS
 174                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 175   val = (char *) re_compile_pattern ((char *)raw_pattern,
 176                                      raw_pattern_size, &cp->buf);
 177   re_set_syntax (old);
 178   UNBLOCK_INPUT;
 179   if (val)
 180     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 181
 182   cp->regexp = Fcopy_sequence (pattern);
 183 }
 184
 185 /* Compile a regexp if necessary, but first check to see if there's one in
 186    the cache.
 187    PATTERN is the pattern to compile.
 188    TRANSLATE is a translation table for ignoring case, or nil for none.
 189    REGP is the structure that says where to store the "register"
 190    values that will result from matching this pattern.
 191    If it is 0, we should compile the pattern not to record any
 192    subexpression bounds.
 193    POSIX is nonzero if we want full backtracking (POSIX style)
 194    for this pattern.  0 means backtrack only enough to get a valid match.  */
 195
 196 struct re_pattern_buffer *
 197 compile_pattern (pattern, regp, translate, posix, multibyte)
 198      Lisp_Object pattern;
 199      struct re_registers *regp;
 200      Lisp_Object translate;
 201      int posix, multibyte;
 202 {
 203   struct regexp_cache *cp, **cpp;
 204
 205   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 206     {
 207       cp = *cpp;
 208       if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
 209           && !NILP (Fstring_equal (cp->regexp, pattern))
 210           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 211           && cp->posix == posix
 212           && cp->buf.multibyte == multibyte)
 213         break;
 214
 215       /* If we're at the end of the cache, compile into the last cell.  */
 216       if (cp->next == 0)
 217         {
 218           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 219           break;
 220         }
 221     }
 222
 223   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 224      either because we found it in the cache or because we just compiled it.
 225      Move it to the front of the queue to mark it as most recently used.  */
 226   *cpp = cp->next;
 227   cp->next = searchbuf_head;
 228   searchbuf_head = cp;
 229
 230   /* Advise the searching functions about the space we have allocated
 231      for register data.  */
 232   if (regp)
 233     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 234
 235   return &cp->buf;
 236 }
 237
 238 /* Error condition used for failing searches */
 239 Lisp_Object Qsearch_failed;
 240
 241 Lisp_Object
 242 signal_failure (arg)
 243      Lisp_Object arg;
 244 {
 245   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 246   return Qnil;
 247 }
 248 \f
 249 static Lisp_Object
 250 looking_at_1 (string, posix)
 251      Lisp_Object string;
 252      int posix;
 253 {
 254   Lisp_Object val;
 255   unsigned char *p1, *p2;
 256   int s1, s2;
 257   register int i;
 258   struct re_pattern_buffer *bufp;
 259
 260   if (running_asynch_code)
 261     save_search_regs ();
 262
 263   CHECK_STRING (string, 0);
 264   bufp = compile_pattern (string, &search_regs,
 265                           (!NILP (current_buffer->case_fold_search)
 266                            ? DOWNCASE_TABLE : Qnil),
 267                           posix,
 268                           !NILP (current_buffer->enable_multibyte_characters));
 269
 270   immediate_quit = 1;
 271   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 272
 273   /* Get pointers and sizes of the two strings
 274      that make up the visible portion of the buffer. */
 275
 276   p1 = BEGV_ADDR;
 277   s1 = GPT_BYTE - BEGV_BYTE;
 278   p2 = GAP_END_ADDR;
 279   s2 = ZV_BYTE - GPT_BYTE;
 280   if (s1 < 0)
 281     {
 282       p2 = p1;
 283       s2 = ZV_BYTE - BEGV_BYTE;
 284       s1 = 0;
 285     }
 286   if (s2 < 0)
 287     {
 288       s1 = ZV_BYTE - BEGV_BYTE;
 289       s2 = 0;
 290     }
 291
 292   re_match_object = Qnil;
 293
 294   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 295                   PT_BYTE - BEGV_BYTE, &search_regs,
 296                   ZV_BYTE - BEGV_BYTE);
 297   if (i == -2)
 298     matcher_overflow ();
 299
 300   val = (0 <= i ? Qt : Qnil);
 301   if (i >= 0)
 302     for (i = 0; i < search_regs.num_regs; i++)
 303       if (search_regs.start[i] >= 0)
 304         {
 305           search_regs.start[i]
 306             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 307           search_regs.end[i]
 308             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 309         }
 310   XSETBUFFER (last_thing_searched, current_buffer);
 311   immediate_quit = 0;
 312   return val;
 313 }
 314
 315 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 316   "Return t if text after point matches regular expression REGEXP.\n\
 317 This function modifies the match data that `match-beginning',\n\
 318 `match-end' and `match-data' access; save and restore the match\n\
 319 data if you want to preserve them.")
 320   (regexp)
 321      Lisp_Object regexp;
 322 {
 323   return looking_at_1 (regexp, 0);
 324 }
 325
 326 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 327   "Return t if text after point matches regular expression REGEXP.\n\
 328 Find the longest match, in accord with Posix regular expression rules.\n\
 329 This function modifies the match data that `match-beginning',\n\
 330 `match-end' and `match-data' access; save and restore the match\n\
 331 data if you want to preserve them.")
 332   (regexp)
 333      Lisp_Object regexp;
 334 {
 335   return looking_at_1 (regexp, 1);
 336 }
 337 \f
 338 static Lisp_Object
 339 string_match_1 (regexp, string, start, posix)
 340      Lisp_Object regexp, string, start;
 341      int posix;
 342 {
 343   int val;
 344   struct re_pattern_buffer *bufp;
 345   int pos, pos_byte;
 346   int i;
 347
 348   if (running_asynch_code)
 349     save_search_regs ();
 350
 351   CHECK_STRING (regexp, 0);
 352   CHECK_STRING (string, 1);
 353
 354   if (NILP (start))
 355     pos = 0, pos_byte = 0;
 356   else
 357     {
 358       int len = XSTRING (string)->size;
 359
 360       CHECK_NUMBER (start, 2);
 361       pos = XINT (start);
 362       if (pos < 0 && -pos <= len)
 363         pos = len + pos;
 364       else if (0 > pos || pos > len)
 365         args_out_of_range (string, start);
 366       pos_byte = string_char_to_byte (string, pos);
 367     }
 368
 369   bufp = compile_pattern (regexp, &search_regs,
 370                           (!NILP (current_buffer->case_fold_search)
 371                            ? DOWNCASE_TABLE : Qnil),
 372                           posix,
 373                           STRING_MULTIBYTE (string));
 374   immediate_quit = 1;
 375   re_match_object = string;
 376
 377   val = re_search (bufp, (char *) XSTRING (string)->data,
 378                    STRING_BYTES (XSTRING (string)), pos_byte,
 379                    STRING_BYTES (XSTRING (string)) - pos_byte,
 380                    &search_regs);
 381   immediate_quit = 0;
 382   last_thing_searched = Qt;
 383   if (val == -2)
 384     matcher_overflow ();
 385   if (val < 0) return Qnil;
 386
 387   for (i = 0; i < search_regs.num_regs; i++)
 388     if (search_regs.start[i] >= 0)
 389       {
 390         search_regs.start[i]
 391           = string_byte_to_char (string, search_regs.start[i]);
 392         search_regs.end[i]
 393           = string_byte_to_char (string, search_regs.end[i]);
 394       }
 395
 396   return make_number (string_byte_to_char (string, val));
 397 }
 398
 399 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 400   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 401 If third arg START is non-nil, start search at that index in STRING.\n\
 402 For index of first char beyond the match, do (match-end 0).\n\
 403 `match-end' and `match-beginning' also give indices of substrings\n\
 404 matched by parenthesis constructs in the pattern.")
 405   (regexp, string, start)
 406      Lisp_Object regexp, string, start;
 407 {
 408   return string_match_1 (regexp, string, start, 0);
 409 }
 410
 411 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 412   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 413 Find the longest match, in accord with Posix regular expression rules.\n\
 414 If third arg START is non-nil, start search at that index in STRING.\n\
 415 For index of first char beyond the match, do (match-end 0).\n\
 416 `match-end' and `match-beginning' also give indices of substrings\n\
 417 matched by parenthesis constructs in the pattern.")
 418   (regexp, string, start)
 419      Lisp_Object regexp, string, start;
 420 {
 421   return string_match_1 (regexp, string, start, 1);
 422 }
 423
 424 /* Match REGEXP against STRING, searching all of STRING,
 425    and return the index of the match, or negative on failure.
 426    This does not clobber the match data.  */
 427
 428 int
 429 fast_string_match (regexp, string)
 430      Lisp_Object regexp, string;
 431 {
 432   int val;
 433   struct re_pattern_buffer *bufp;
 434
 435   bufp = compile_pattern (regexp, 0, Qnil,
 436                           0, STRING_MULTIBYTE (string));
 437   immediate_quit = 1;
 438   re_match_object = string;
 439
 440   val = re_search (bufp, (char *) XSTRING (string)->data,
 441                    STRING_BYTES (XSTRING (string)), 0,
 442                    STRING_BYTES (XSTRING (string)), 0);
 443   immediate_quit = 0;
 444   return val;
 445 }
 446
 447 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 448    and return the index of the match, or negative on failure.
 449    This does not clobber the match data.
 450    We assume that STRING contains single-byte characters.  */
 451
 452 extern Lisp_Object Vascii_downcase_table;
 453
 454 int
 455 fast_c_string_match_ignore_case (regexp, string)
 456      Lisp_Object regexp;
 457      char *string;
 458 {
 459   int val;
 460   struct re_pattern_buffer *bufp;
 461   int len = strlen (string);
 462
 463   regexp = string_make_unibyte (regexp);
 464   re_match_object = Qt;
 465   bufp = compile_pattern (regexp, 0,
 466                           Vascii_downcase_table, 0,
 467                           0);
 468   immediate_quit = 1;
 469   val = re_search (bufp, string, len, 0, len, 0);
 470   immediate_quit = 0;
 471   return val;
 472 }
 473 \f
 474 /* The newline cache: remembering which sections of text have no newlines.  */
 475
 476 /* If the user has requested newline caching, make sure it's on.
 477    Otherwise, make sure it's off.
 478    This is our cheezy way of associating an action with the change of
 479    state of a buffer-local variable.  */
 480 static void
 481 newline_cache_on_off (buf)
 482      struct buffer *buf;
 483 {
 484   if (NILP (buf->cache_long_line_scans))
 485     {
 486       /* It should be off.  */
 487       if (buf->newline_cache)
 488         {
 489           free_region_cache (buf->newline_cache);
 490           buf->newline_cache = 0;
 491         }
 492     }
 493   else
 494     {
 495       /* It should be on.  */
 496       if (buf->newline_cache == 0)
 497         buf->newline_cache = new_region_cache ();
 498     }
 499 }
 500
 501 \f
 502 /* Search for COUNT instances of the character TARGET between START and END.
 503
 504    If COUNT is positive, search forwards; END must be >= START.
 505    If COUNT is negative, search backwards for the -COUNTth instance;
 506       END must be <= START.
 507    If COUNT is zero, do anything you please; run rogue, for all I care.
 508
 509    If END is zero, use BEGV or ZV instead, as appropriate for the
 510    direction indicated by COUNT.
 511
 512    If we find COUNT instances, set *SHORTAGE to zero, and return the
 513    position after the COUNTth match.  Note that for reverse motion
 514    this is not the same as the usual convention for Emacs motion commands.
 515
 516    If we don't find COUNT instances before reaching END, set *SHORTAGE
 517    to the number of TARGETs left unfound, and return END.
 518
 519    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 520    except when inside redisplay.  */
 521
 522 int
 523 scan_buffer (target, start, end, count, shortage, allow_quit)
 524      register int target;
 525      int start, end;
 526      int count;
 527      int *shortage;
 528      int allow_quit;
 529 {
 530   struct region_cache *newline_cache;
 531   int direction;
 532
 533   if (count > 0)
 534     {
 535       direction = 1;
 536       if (! end) end = ZV;
 537     }
 538   else
 539     {
 540       direction = -1;
 541       if (! end) end = BEGV;
 542     }
 543
 544   newline_cache_on_off (current_buffer);
 545   newline_cache = current_buffer->newline_cache;
 546
 547   if (shortage != 0)
 548     *shortage = 0;
 549
 550   immediate_quit = allow_quit;
 551
 552   if (count > 0)
 553     while (start != end)
 554       {
 555         /* Our innermost scanning loop is very simple; it doesn't know
 556            about gaps, buffer ends, or the newline cache.  ceiling is
 557            the position of the last character before the next such
 558            obstacle --- the last character the dumb search loop should
 559            examine.  */
 560         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 561         int start_byte = CHAR_TO_BYTE (start);
 562         int tem;
 563
 564         /* If we're looking for a newline, consult the newline cache
 565            to see where we can avoid some scanning.  */
 566         if (target == '\n' && newline_cache)
 567           {
 568             int next_change;
 569             immediate_quit = 0;
 570             while (region_cache_forward
 571                    (current_buffer, newline_cache, start_byte, &next_change))
 572               start_byte = next_change;
 573             immediate_quit = allow_quit;
 574
 575             /* START should never be after END.  */
 576             if (start_byte > ceiling_byte)
 577               start_byte = ceiling_byte;
 578
 579             /* Now the text after start is an unknown region, and
 580                next_change is the position of the next known region. */
 581             ceiling_byte = min (next_change - 1, ceiling_byte);
 582           }
 583
 584         /* The dumb loop can only scan text stored in contiguous
 585            bytes. BUFFER_CEILING_OF returns the last character
 586            position that is contiguous, so the ceiling is the
 587            position after that.  */
 588         tem = BUFFER_CEILING_OF (start_byte);
 589         ceiling_byte = min (tem, ceiling_byte);
 590
 591         {
 592           /* The termination address of the dumb loop.  */
 593           register unsigned char *ceiling_addr
 594             = BYTE_POS_ADDR (ceiling_byte) + 1;
 595           register unsigned char *cursor
 596             = BYTE_POS_ADDR (start_byte);
 597           unsigned char *base = cursor;
 598
 599           while (cursor < ceiling_addr)
 600             {
 601               unsigned char *scan_start = cursor;
 602
 603               /* The dumb loop.  */
 604               while (*cursor != target && ++cursor < ceiling_addr)
 605                 ;
 606
 607               /* If we're looking for newlines, cache the fact that
 608                  the region from start to cursor is free of them. */
 609               if (target == '\n' && newline_cache)
 610                 know_region_cache (current_buffer, newline_cache,
 611                                    start_byte + scan_start - base,
 612                                    start_byte + cursor - base);
 613
 614               /* Did we find the target character?  */
 615               if (cursor < ceiling_addr)
 616                 {
 617                   if (--count == 0)
 618                     {
 619                       immediate_quit = 0;
 620                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 621                     }
 622                   cursor++;
 623                 }
 624             }
 625
 626           start = BYTE_TO_CHAR (start_byte + cursor - base);
 627         }
 628       }
 629   else
 630     while (start > end)
 631       {
 632         /* The last character to check before the next obstacle.  */
 633         int ceiling_byte = CHAR_TO_BYTE (end);
 634         int start_byte = CHAR_TO_BYTE (start);
 635         int tem;
 636
 637         /* Consult the newline cache, if appropriate.  */
 638         if (target == '\n' && newline_cache)
 639           {
 640             int next_change;
 641             immediate_quit = 0;
 642             while (region_cache_backward
 643                    (current_buffer, newline_cache, start_byte, &next_change))
 644               start_byte = next_change;
 645             immediate_quit = allow_quit;
 646
 647             /* Start should never be at or before end.  */
 648             if (start_byte <= ceiling_byte)
 649               start_byte = ceiling_byte + 1;
 650
 651             /* Now the text before start is an unknown region, and
 652                next_change is the position of the next known region. */
 653             ceiling_byte = max (next_change, ceiling_byte);
 654           }
 655
 656         /* Stop scanning before the gap.  */
 657         tem = BUFFER_FLOOR_OF (start_byte - 1);
 658         ceiling_byte = max (tem, ceiling_byte);
 659
 660         {
 661           /* The termination address of the dumb loop.  */
 662           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 663           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 664           unsigned char *base = cursor;
 665
 666           while (cursor >= ceiling_addr)
 667             {
 668               unsigned char *scan_start = cursor;
 669
 670               while (*cursor != target && --cursor >= ceiling_addr)
 671                 ;
 672
 673               /* If we're looking for newlines, cache the fact that
 674                  the region from after the cursor to start is free of them.  */
 675               if (target == '\n' && newline_cache)
 676                 know_region_cache (current_buffer, newline_cache,
 677                                    start_byte + cursor - base,
 678                                    start_byte + scan_start - base);
 679
 680               /* Did we find the target character?  */
 681               if (cursor >= ceiling_addr)
 682                 {
 683                   if (++count >= 0)
 684                     {
 685                       immediate_quit = 0;
 686                       return BYTE_TO_CHAR (start_byte + cursor - base);
 687                     }
 688                   cursor--;
 689                 }
 690             }
 691
 692           start = BYTE_TO_CHAR (start_byte + cursor - base);
 693         }
 694       }
 695
 696   immediate_quit = 0;
 697   if (shortage != 0)
 698     *shortage = count * direction;
 699   return start;
 700 }
 701 \f
 702 /* Search for COUNT instances of a line boundary, which means either a
 703    newline or (if selective display enabled) a carriage return.
 704    Start at START.  If COUNT is negative, search backwards.
 705
 706    We report the resulting position by calling TEMP_SET_PT_BOTH.
 707
 708    If we find COUNT instances. we position after (always after,
 709    even if scanning backwards) the COUNTth match, and return 0.
 710
 711    If we don't find COUNT instances before reaching the end of the
 712    buffer (or the beginning, if scanning backwards), we return
 713    the number of line boundaries left unfound, and position at
 714    the limit we bumped up against.
 715
 716    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 717    except in special cases.  */
 718
 719 int
 720 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 721      int start, start_byte;
 722      int limit, limit_byte;
 723      register int count;
 724      int allow_quit;
 725 {
 726   int direction = ((count > 0) ? 1 : -1);
 727
 728   register unsigned char *cursor;
 729   unsigned char *base;
 730
 731   register int ceiling;
 732   register unsigned char *ceiling_addr;
 733
 734   int old_immediate_quit = immediate_quit;
 735
 736   /* If we are not in selective display mode,
 737      check only for newlines.  */
 738   int selective_display = (!NILP (current_buffer->selective_display)
 739                            && !INTEGERP (current_buffer->selective_display));
 740
 741   /* The code that follows is like scan_buffer
 742      but checks for either newline or carriage return.  */
 743
 744   if (allow_quit)
 745     immediate_quit++;
 746
 747   start_byte = CHAR_TO_BYTE (start);
 748
 749   if (count > 0)
 750     {
 751       while (start_byte < limit_byte)
 752         {
 753           ceiling =  BUFFER_CEILING_OF (start_byte);
 754           ceiling = min (limit_byte - 1, ceiling);
 755           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 756           base = (cursor = BYTE_POS_ADDR (start_byte));
 757           while (1)
 758             {
 759               while (*cursor != '\n' && ++cursor != ceiling_addr)
 760                 ;
 761
 762               if (cursor != ceiling_addr)
 763                 {
 764                   if (--count == 0)
 765                     {
 766                       immediate_quit = old_immediate_quit;
 767                       start_byte = start_byte + cursor - base + 1;
 768                       start = BYTE_TO_CHAR (start_byte);
 769                       TEMP_SET_PT_BOTH (start, start_byte);
 770                       return 0;
 771                     }
 772                   else
 773                     if (++cursor == ceiling_addr)
 774                       break;
 775                 }
 776               else
 777                 break;
 778             }
 779           start_byte += cursor - base;
 780         }
 781     }
 782   else
 783     {
 784       while (start_byte > limit_byte)
 785         {
 786           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 787           ceiling = max (limit_byte, ceiling);
 788           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 789           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 790           while (1)
 791             {
 792               while (--cursor != ceiling_addr && *cursor != '\n')
 793                 ;
 794
 795               if (cursor != ceiling_addr)
 796                 {
 797                   if (++count == 0)
 798                     {
 799                       immediate_quit = old_immediate_quit;
 800                       /* Return the position AFTER the match we found.  */
 801                       start_byte = start_byte + cursor - base + 1;
 802                       start = BYTE_TO_CHAR (start_byte);
 803                       TEMP_SET_PT_BOTH (start, start_byte);
 804                       return 0;
 805                     }
 806                 }
 807               else
 808                 break;
 809             }
 810           /* Here we add 1 to compensate for the last decrement
 811              of CURSOR, which took it past the valid range.  */
 812           start_byte += cursor - base + 1;
 813         }
 814     }
 815
 816   TEMP_SET_PT_BOTH (limit, limit_byte);
 817   immediate_quit = old_immediate_quit;
 818
 819   return count * direction;
 820 }
 821
 822 int
 823 find_next_newline_no_quit (from, cnt)
 824      register int from, cnt;
 825 {
 826   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 827 }
 828
 829 /* Like find_next_newline, but returns position before the newline,
 830    not after, and only search up to TO.  This isn't just
 831    find_next_newline (...)-1, because you might hit TO.  */
 832
 833 int
 834 find_before_next_newline (from, to, cnt)
 835      int from, to, cnt;
 836 {
 837   int shortage;
 838   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 839
 840   if (shortage == 0)
 841     pos--;
 842
 843   return pos;
 844 }
 845 \f
 846 /* Subroutines of Lisp buffer search functions. */
 847
 848 static Lisp_Object
 849 search_command (string, bound, noerror, count, direction, RE, posix)
 850      Lisp_Object string, bound, noerror, count;
 851      int direction;
 852      int RE;
 853      int posix;
 854 {
 855   register int np;
 856   int lim, lim_byte;
 857   int n = direction;
 858
 859   if (!NILP (count))
 860     {
 861       CHECK_NUMBER (count, 3);
 862       n *= XINT (count);
 863     }
 864
 865   CHECK_STRING (string, 0);
 866   if (NILP (bound))
 867     {
 868       if (n > 0)
 869         lim = ZV, lim_byte = ZV_BYTE;
 870       else
 871         lim = BEGV, lim_byte = BEGV_BYTE;
 872     }
 873   else
 874     {
 875       CHECK_NUMBER_COERCE_MARKER (bound, 1);
 876       lim = XINT (bound);
 877       if (n > 0 ? lim < PT : lim > PT)
 878         error ("Invalid search bound (wrong side of point)");
 879       if (lim > ZV)
 880         lim = ZV, lim_byte = ZV_BYTE;
 881       else if (lim < BEGV)
 882         lim = BEGV, lim_byte = BEGV_BYTE;
 883       else
 884         lim_byte = CHAR_TO_BYTE (lim);
 885     }
 886
 887   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 888                       (!NILP (current_buffer->case_fold_search)
 889                        ? current_buffer->case_canon_table
 890                        : Qnil),
 891                       (!NILP (current_buffer->case_fold_search)
 892                        ? current_buffer->case_eqv_table
 893                        : Qnil),
 894                       posix);
 895   if (np <= 0)
 896     {
 897       if (NILP (noerror))
 898         return signal_failure (string);
 899       if (!EQ (noerror, Qt))
 900         {
 901           if (lim < BEGV || lim > ZV)
 902             abort ();
 903           SET_PT_BOTH (lim, lim_byte);
 904           return Qnil;
 905 #if 0 /* This would be clean, but maybe programs depend on
 906          a value of nil here.  */
 907           np = lim;
 908 #endif
 909         }
 910       else
 911         return Qnil;
 912     }
 913
 914   if (np < BEGV || np > ZV)
 915     abort ();
 916
 917   SET_PT (np);
 918
 919   return make_number (np);
 920 }
 921 \f
 922 /* Return 1 if REGEXP it matches just one constant string.  */
 923
 924 static int
 925 trivial_regexp_p (regexp)
 926      Lisp_Object regexp;
 927 {
 928   int len = STRING_BYTES (XSTRING (regexp));
 929   unsigned char *s = XSTRING (regexp)->data;
 930   unsigned char c;
 931   while (--len >= 0)
 932     {
 933       switch (*s++)
 934         {
 935         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 936           return 0;
 937         case '\\':
 938           if (--len < 0)
 939             return 0;
 940           switch (*s++)
 941             {
 942             case '|': case '(': case ')': case '`': case '\'': case 'b':
 943             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 944             case 'S': case '=':
 945             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 946             case '1': case '2': case '3': case '4': case '5':
 947             case '6': case '7': case '8': case '9':
 948               return 0;
 949             }
 950         }
 951     }
 952   return 1;
 953 }
 954
 955 /* Search for the n'th occurrence of STRING in the current buffer,
 956    starting at position POS and stopping at position LIM,
 957    treating STRING as a literal string if RE is false or as
 958    a regular expression if RE is true.
 959
 960    If N is positive, searching is forward and LIM must be greater than POS.
 961    If N is negative, searching is backward and LIM must be less than POS.
 962
 963    Returns -x if x occurrences remain to be found (x > 0),
 964    or else the position at the beginning of the Nth occurrence
 965    (if searching backward) or the end (if searching forward).
 966
 967    POSIX is nonzero if we want full backtracking (POSIX style)
 968    for this pattern.  0 means backtrack only enough to get a valid match.  */
 969
 970 #define TRANSLATE(out, trt, d)                  \
 971 do                                              \
 972   {                                             \
 973     if (! NILP (trt))                           \
 974       {                                         \
 975         Lisp_Object temp;                       \
 976         temp = Faref (trt, make_number (d));    \
 977         if (INTEGERP (temp))                    \
 978           out = XINT (temp);                    \
 979         else                                    \
 980           out = d;                              \
 981       }                                         \
 982     else                                        \
 983       out = d;                                  \
 984   }                                             \
 985 while (0)
 986
 987 static int
 988 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
 989                RE, trt, inverse_trt, posix)
 990      Lisp_Object string;
 991      int pos;
 992      int pos_byte;
 993      int lim;
 994      int lim_byte;
 995      int n;
 996      int RE;
 997      Lisp_Object trt;
 998      Lisp_Object inverse_trt;
 999      int posix;
1000 {
1001   int len = XSTRING (string)->size;
1002   int len_byte = STRING_BYTES (XSTRING (string));
1003   register int i;
1004
1005   if (running_asynch_code)
1006     save_search_regs ();
1007
1008   /* Null string is found at starting position.  */
1009   if (len == 0)
1010     {
1011       set_search_regs (pos, 0);
1012       return pos;
1013     }
1014
1015   /* Searching 0 times means don't move.  */
1016   if (n == 0)
1017     return pos;
1018
1019   if (RE && !trivial_regexp_p (string))
1020     {
1021       unsigned char *p1, *p2;
1022       int s1, s2;
1023       struct re_pattern_buffer *bufp;
1024
1025       bufp = compile_pattern (string, &search_regs, trt, posix,
1026                               !NILP (current_buffer->enable_multibyte_characters));
1027
1028       immediate_quit = 1;       /* Quit immediately if user types ^G,
1029                                    because letting this function finish
1030                                    can take too long. */
1031       QUIT;                     /* Do a pending quit right away,
1032                                    to avoid paradoxical behavior */
1033       /* Get pointers and sizes of the two strings
1034          that make up the visible portion of the buffer. */
1035
1036       p1 = BEGV_ADDR;
1037       s1 = GPT_BYTE - BEGV_BYTE;
1038       p2 = GAP_END_ADDR;
1039       s2 = ZV_BYTE - GPT_BYTE;
1040       if (s1 < 0)
1041         {
1042           p2 = p1;
1043           s2 = ZV_BYTE - BEGV_BYTE;
1044           s1 = 0;
1045         }
1046       if (s2 < 0)
1047         {
1048           s1 = ZV_BYTE - BEGV_BYTE;
1049           s2 = 0;
1050         }
1051       re_match_object = Qnil;
1052
1053       while (n < 0)
1054         {
1055           int val;
1056           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1057                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1058                              &search_regs,
1059                              /* Don't allow match past current point */
1060                              pos_byte - BEGV_BYTE);
1061           if (val == -2)
1062             {
1063               matcher_overflow ();
1064             }
1065           if (val >= 0)
1066             {
1067               pos_byte = search_regs.start[0] + BEGV_BYTE;
1068               for (i = 0; i < search_regs.num_regs; i++)
1069                 if (search_regs.start[i] >= 0)
1070                   {
1071                     search_regs.start[i]
1072                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1073                     search_regs.end[i]
1074                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1075                   }
1076               XSETBUFFER (last_thing_searched, current_buffer);
1077               /* Set pos to the new position. */
1078               pos = search_regs.start[0];
1079             }
1080           else
1081             {
1082               immediate_quit = 0;
1083               return (n);
1084             }
1085           n++;
1086         }
1087       while (n > 0)
1088         {
1089           int val;
1090           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1091                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1092                              &search_regs,
1093                              lim_byte - BEGV_BYTE);
1094           if (val == -2)
1095             {
1096               matcher_overflow ();
1097             }
1098           if (val >= 0)
1099             {
1100               pos_byte = search_regs.end[0] + BEGV_BYTE;
1101               for (i = 0; i < search_regs.num_regs; i++)
1102                 if (search_regs.start[i] >= 0)
1103                   {
1104                     search_regs.start[i]
1105                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1106                     search_regs.end[i]
1107                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1108                   }
1109               XSETBUFFER (last_thing_searched, current_buffer);
1110               pos = search_regs.end[0];
1111             }
1112           else
1113             {
1114               immediate_quit = 0;
1115               return (0 - n);
1116             }
1117           n--;
1118         }
1119       immediate_quit = 0;
1120       return (pos);
1121     }
1122   else                          /* non-RE case */
1123     {
1124       unsigned char *raw_pattern, *pat;
1125       int raw_pattern_size;
1126       int raw_pattern_size_byte;
1127       unsigned char *patbuf;
1128       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1129       unsigned char *base_pat = XSTRING (string)->data;
1130       int charset_base = -1;
1131       int simple = 1;
1132
1133       /* MULTIBYTE says whether the text to be searched is multibyte.
1134          We must convert PATTERN to match that, or we will not really
1135          find things right.  */
1136
1137       if (multibyte == STRING_MULTIBYTE (string))
1138         {
1139           raw_pattern = (unsigned char *) XSTRING (string)->data;
1140           raw_pattern_size = XSTRING (string)->size;
1141           raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
1142         }
1143       else if (multibyte)
1144         {
1145           raw_pattern_size = XSTRING (string)->size;
1146           raw_pattern_size_byte
1147             = count_size_as_multibyte (XSTRING (string)->data,
1148                                        raw_pattern_size);
1149           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1150           copy_text (XSTRING (string)->data, raw_pattern,
1151                      XSTRING (string)->size, 0, 1);
1152         }
1153       else
1154         {
1155           /* Converting multibyte to single-byte.
1156
1157              ??? Perhaps this conversion should be done in a special way
1158              by subtracting nonascii-insert-offset from each non-ASCII char,
1159              so that only the multibyte chars which really correspond to
1160              the chosen single-byte character set can possibly match.  */
1161           raw_pattern_size = XSTRING (string)->size;
1162           raw_pattern_size_byte = XSTRING (string)->size;
1163           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1164           copy_text (XSTRING (string)->data, raw_pattern,
1165                      STRING_BYTES (XSTRING (string)), 1, 0);
1166         }
1167
1168       /* Copy and optionally translate the pattern.  */
1169       len = raw_pattern_size;
1170       len_byte = raw_pattern_size_byte;
1171       patbuf = (unsigned char *) alloca (len_byte);
1172       pat = patbuf;
1173       base_pat = raw_pattern;
1174       if (multibyte)
1175         {
1176           while (--len >= 0)
1177             {
1178               unsigned char workbuf[4], *str;
1179               int c, translated, inverse;
1180               int in_charlen, charlen;
1181
1182               /* If we got here and the RE flag is set, it's because we're
1183                  dealing with a regexp known to be trivial, so the backslash
1184                  just quotes the next character.  */
1185               if (RE && *base_pat == '\\')
1186                 {
1187                   len--;
1188                   len_byte--;
1189                   base_pat++;
1190                 }
1191
1192               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1193               /* Translate the character, if requested.  */
1194               TRANSLATE (translated, trt, c);
1195               /* If translation changed the byte-length, go back
1196                  to the original character.  */
1197               charlen = CHAR_STRING (translated, workbuf, str);
1198               if (in_charlen != charlen)
1199                 {
1200                   translated = c;
1201                   charlen = CHAR_STRING (c, workbuf, str);
1202                 }
1203
1204               TRANSLATE (inverse, inverse_trt, c);
1205
1206               /* Did this char actually get translated?
1207                  Would any other char get translated into it?  */
1208               if (translated != c || inverse != c)
1209                 {
1210                   /* Keep track of which character set row
1211                      contains the characters that need translation.  */
1212                   int charset_base_code = c & ~0xff;
1213                   if (charset_base == -1)
1214                     charset_base = charset_base_code;
1215                   else if (charset_base != charset_base_code)
1216                     /* If two different rows appear, needing translation,
1217                        then we cannot use boyer_moore search.  */
1218                     simple = 0;
1219                     /* ??? Handa: this must do simple = 0
1220                        if c is a composite character.  */
1221                 }
1222
1223               /* Store this character into the translated pattern.  */
1224               bcopy (str, pat, charlen);
1225               pat += charlen;
1226               base_pat += in_charlen;
1227               len_byte -= in_charlen;
1228             }
1229         }
1230       else
1231         {
1232           while (--len >= 0)
1233             {
1234               int c, translated, inverse;
1235
1236               /* If we got here and the RE flag is set, it's because we're
1237                  dealing with a regexp known to be trivial, so the backslash
1238                  just quotes the next character.  */
1239               if (RE && *base_pat == '\\')
1240                 {
1241                   len--;
1242                   base_pat++;
1243                 }
1244               c = *base_pat++;
1245               TRANSLATE (translated, trt, c);
1246               TRANSLATE (inverse, inverse_trt, c);
1247
1248               /* Did this char actually get translated?
1249                  Would any other char get translated into it?  */
1250               if (translated != c || inverse != c)
1251                 {
1252                   /* Keep track of which character set row
1253                      contains the characters that need translation.  */
1254                   int charset_base_code = c & ~0xff;
1255                   if (charset_base == -1)
1256                     charset_base = charset_base_code;
1257                   else if (charset_base != charset_base_code)
1258                     /* If two different rows appear, needing translation,
1259                        then we cannot use boyer_moore search.  */
1260                     simple = 0;
1261                 }
1262               *pat++ = translated;
1263             }
1264         }
1265
1266       len_byte = pat - patbuf;
1267       len = raw_pattern_size;
1268       pat = base_pat = patbuf;
1269
1270       if (simple)
1271         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1272                             pos, pos_byte, lim, lim_byte,
1273                             charset_base);
1274       else
1275         return simple_search (n, pat, len, len_byte, trt,
1276                               pos, pos_byte, lim, lim_byte);
1277     }
1278 }
1279 \f
1280 /* Do a simple string search N times for the string PAT,
1281    whose length is LEN/LEN_BYTE,
1282    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1283    TRT is the translation table.
1284
1285    Return the character position where the match is found.
1286    Otherwise, if M matches remained to be found, return -M.
1287
1288    This kind of search works regardless of what is in PAT and
1289    regardless of what is in TRT.  It is used in cases where
1290    boyer_moore cannot work.  */
1291
1292 static int
1293 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1294      int n;
1295      unsigned char *pat;
1296      int len, len_byte;
1297      Lisp_Object trt;
1298      int pos, pos_byte;
1299      int lim, lim_byte;
1300 {
1301   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1302   int forward = n > 0;
1303
1304   if (lim > pos && multibyte)
1305     while (n > 0)
1306       {
1307         while (1)
1308           {
1309             /* Try matching at position POS.  */
1310             int this_pos = pos;
1311             int this_pos_byte = pos_byte;
1312             int this_len = len;
1313             int this_len_byte = len_byte;
1314             unsigned char *p = pat;
1315             if (pos + len > lim)
1316               goto stop;
1317
1318             while (this_len > 0)
1319               {
1320                 int charlen, buf_charlen;
1321                 int pat_ch, buf_ch;
1322
1323                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1324                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1325                                                  ZV_BYTE - this_pos_byte,
1326                                                  buf_charlen);
1327                 TRANSLATE (buf_ch, trt, buf_ch);
1328
1329                 if (buf_ch != pat_ch)
1330                   break;
1331
1332                 this_len_byte -= charlen;
1333                 this_len--;
1334                 p += charlen;
1335
1336                 this_pos_byte += buf_charlen;
1337                 this_pos++;
1338               }
1339
1340             if (this_len == 0)
1341               {
1342                 pos += len;
1343                 pos_byte += len_byte;
1344                 break;
1345               }
1346
1347             INC_BOTH (pos, pos_byte);
1348           }
1349
1350         n--;
1351       }
1352   else if (lim > pos)
1353     while (n > 0)
1354       {
1355         while (1)
1356           {
1357             /* Try matching at position POS.  */
1358             int this_pos = pos;
1359             int this_len = len;
1360             unsigned char *p = pat;
1361
1362             if (pos + len > lim)
1363               goto stop;
1364
1365             while (this_len > 0)
1366               {
1367                 int pat_ch = *p++;
1368                 int buf_ch = FETCH_BYTE (this_pos);
1369                 TRANSLATE (buf_ch, trt, buf_ch);
1370
1371                 if (buf_ch != pat_ch)
1372                   break;
1373
1374                 this_len--;
1375                 this_pos++;
1376               }
1377
1378             if (this_len == 0)
1379               {
1380                 pos += len;
1381                 break;
1382               }
1383
1384             pos++;
1385           }
1386
1387         n--;
1388       }
1389   /* Backwards search.  */
1390   else if (lim < pos && multibyte)
1391     while (n < 0)
1392       {
1393         while (1)
1394           {
1395             /* Try matching at position POS.  */
1396             int this_pos = pos - len;
1397             int this_pos_byte = pos_byte - len_byte;
1398             int this_len = len;
1399             int this_len_byte = len_byte;
1400             unsigned char *p = pat;
1401
1402             if (pos - len < lim)
1403               goto stop;
1404
1405             while (this_len > 0)
1406               {
1407                 int charlen, buf_charlen;
1408                 int pat_ch, buf_ch;
1409
1410                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1411                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1412                                                  ZV_BYTE - this_pos_byte,
1413                                                  buf_charlen);
1414                 TRANSLATE (buf_ch, trt, buf_ch);
1415
1416                 if (buf_ch != pat_ch)
1417                   break;
1418
1419                 this_len_byte -= charlen;
1420                 this_len--;
1421                 p += charlen;
1422                 this_pos_byte += buf_charlen;
1423                 this_pos++;
1424               }
1425
1426             if (this_len == 0)
1427               {
1428                 pos -= len;
1429                 pos_byte -= len_byte;
1430                 break;
1431               }
1432
1433             DEC_BOTH (pos, pos_byte);
1434           }
1435
1436         n++;
1437       }
1438   else if (lim < pos)
1439     while (n < 0)
1440       {
1441         while (1)
1442           {
1443             /* Try matching at position POS.  */
1444             int this_pos = pos - len;
1445             int this_len = len;
1446             unsigned char *p = pat;
1447
1448             if (pos - len < lim)
1449               goto stop;
1450
1451             while (this_len > 0)
1452               {
1453                 int pat_ch = *p++;
1454                 int buf_ch = FETCH_BYTE (this_pos);
1455                 TRANSLATE (buf_ch, trt, buf_ch);
1456
1457                 if (buf_ch != pat_ch)
1458                   break;
1459                 this_len--;
1460                 this_pos++;
1461               }
1462
1463             if (this_len == 0)
1464               {
1465                 pos -= len;
1466                 break;
1467               }
1468
1469             pos--;
1470           }
1471
1472         n++;
1473       }
1474
1475  stop:
1476   if (n == 0)
1477     {
1478       if (forward)
1479         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1480       else
1481         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1482
1483       return pos;
1484     }
1485   else if (n > 0)
1486     return -n;
1487   else
1488     return n;
1489 }
1490 \f
1491 /* Do Boyer-Moore search N times for the string PAT,
1492    whose length is LEN/LEN_BYTE,
1493    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1494    DIRECTION says which direction we search in.
1495    TRT and INVERSE_TRT are translation tables.
1496
1497    This kind of search works if all the characters in PAT that have
1498    nontrivial translation are the same aside from the last byte.  This
1499    makes it possible to translate just the last byte of a character,
1500    and do so after just a simple test of the context.
1501
1502    If that criterion is not satisfied, do not call this function.  */
1503
1504 static int
1505 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1506              pos, pos_byte, lim, lim_byte, charset_base)
1507      int n;
1508      unsigned char *base_pat;
1509      int len, len_byte;
1510      Lisp_Object trt;
1511      Lisp_Object inverse_trt;
1512      int pos, pos_byte;
1513      int lim, lim_byte;
1514      int charset_base;
1515 {
1516   int direction = ((n > 0) ? 1 : -1);
1517   register int dirlen;
1518   int infinity, limit, k, stride_for_teases;
1519   register int *BM_tab;
1520   int *BM_tab_base;
1521   register unsigned char *cursor, *p_limit;
1522   register int i, j;
1523   unsigned char *pat, *pat_end;
1524   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1525
1526   unsigned char simple_translate[0400];
1527   int translate_prev_byte;
1528   int translate_anteprev_byte;
1529
1530 #ifdef C_ALLOCA
1531   int BM_tab_space[0400];
1532   BM_tab = &BM_tab_space[0];
1533 #else
1534   BM_tab = (int *) alloca (0400 * sizeof (int));
1535 #endif
1536   /* The general approach is that we are going to maintain that we know */
1537   /* the first (closest to the present position, in whatever direction */
1538   /* we're searching) character that could possibly be the last */
1539   /* (furthest from present position) character of a valid match.  We */
1540   /* advance the state of our knowledge by looking at that character */
1541   /* and seeing whether it indeed matches the last character of the */
1542   /* pattern.  If it does, we take a closer look.  If it does not, we */
1543   /* move our pointer (to putative last characters) as far as is */
1544   /* logically possible.  This amount of movement, which I call a */
1545   /* stride, will be the length of the pattern if the actual character */
1546   /* appears nowhere in the pattern, otherwise it will be the distance */
1547   /* from the last occurrence of that character to the end of the */
1548   /* pattern. */
1549   /* As a coding trick, an enormous stride is coded into the table for */
1550   /* characters that match the last character.  This allows use of only */
1551   /* a single test, a test for having gone past the end of the */
1552   /* permissible match region, to test for both possible matches (when */
1553   /* the stride goes past the end immediately) and failure to */
1554   /* match (where you get nudged past the end one stride at a time). */
1555
1556   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1557   /* is determined only by the last character of the putative match. */
1558   /* If that character does not match, we will stride the proper */
1559   /* distance to propose a match that superimposes it on the last */
1560   /* instance of a character that matches it (per trt), or misses */
1561   /* it entirely if there is none. */
1562
1563   dirlen = len_byte * direction;
1564   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1565
1566   /* Record position after the end of the pattern.  */
1567   pat_end = base_pat + len_byte;
1568   /* BASE_PAT points to a character that we start scanning from.
1569      It is the first character in a forward search,
1570      the last character in a backward search.  */
1571   if (direction < 0)
1572     base_pat = pat_end - 1;
1573
1574   BM_tab_base = BM_tab;
1575   BM_tab += 0400;
1576   j = dirlen;           /* to get it in a register */
1577   /* A character that does not appear in the pattern induces a */
1578   /* stride equal to the pattern length. */
1579   while (BM_tab_base != BM_tab)
1580     {
1581       *--BM_tab = j;
1582       *--BM_tab = j;
1583       *--BM_tab = j;
1584       *--BM_tab = j;
1585     }
1586
1587   /* We use this for translation, instead of TRT itself.
1588      We fill this in to handle the characters that actually
1589      occur in the pattern.  Others don't matter anyway!  */
1590   bzero (simple_translate, sizeof simple_translate);
1591   for (i = 0; i < 0400; i++)
1592     simple_translate[i] = i;
1593
1594   i = 0;
1595   while (i != infinity)
1596     {
1597       unsigned char *ptr = base_pat + i;
1598       i += direction;
1599       if (i == dirlen)
1600         i = infinity;
1601       if (! NILP (trt))
1602         {
1603           int ch;
1604           int untranslated;
1605           int this_translated = 1;
1606
1607           if (multibyte
1608               /* Is *PTR the last byte of a character?  */
1609               && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1610             {
1611               unsigned char *charstart = ptr;
1612               while (! CHAR_HEAD_P (*charstart))
1613                 charstart--;
1614               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1615               if (charset_base == (untranslated & ~0xff))
1616                 {
1617                   TRANSLATE (ch, trt, untranslated);
1618                   if (! CHAR_HEAD_P (*ptr))
1619                     {
1620                       translate_prev_byte = ptr[-1];
1621                       if (! CHAR_HEAD_P (translate_prev_byte))
1622                         translate_anteprev_byte = ptr[-2];
1623                     }
1624                 }
1625               else
1626                 {
1627                   this_translated = 0;
1628                   ch = *ptr;
1629                 }
1630             }
1631           else if (!multibyte)
1632             TRANSLATE (ch, trt, *ptr);
1633           else
1634             {
1635               ch = *ptr;
1636               this_translated = 0;
1637             }
1638
1639           if (ch > 0400)
1640             j = ((unsigned char) ch) | 0200;
1641           else
1642             j = (unsigned char) ch;
1643
1644           if (i == infinity)
1645             stride_for_teases = BM_tab[j];
1646
1647           BM_tab[j] = dirlen - i;
1648           /* A translation table is accompanied by its inverse -- see */
1649           /* comment following downcase_table for details */
1650           if (this_translated)
1651             {
1652               int starting_ch = ch;
1653               int starting_j = j;
1654               while (1)
1655                 {
1656                   TRANSLATE (ch, inverse_trt, ch);
1657                   if (ch > 0400)
1658                     j = ((unsigned char) ch) | 0200;
1659                   else
1660                     j = (unsigned char) ch;
1661
1662                   /* For all the characters that map into CH,
1663                      set up simple_translate to map the last byte
1664                      into STARTING_J.  */
1665                   simple_translate[j] = starting_j;
1666                   if (ch == starting_ch)
1667                     break;
1668                   BM_tab[j] = dirlen - i;
1669                 }
1670             }
1671         }
1672       else
1673         {
1674           j = *ptr;
1675
1676           if (i == infinity)
1677             stride_for_teases = BM_tab[j];
1678           BM_tab[j] = dirlen - i;
1679         }
1680       /* stride_for_teases tells how much to stride if we get a */
1681       /* match on the far character but are subsequently */
1682       /* disappointed, by recording what the stride would have been */
1683       /* for that character if the last character had been */
1684       /* different. */
1685     }
1686   infinity = dirlen - infinity;
1687   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1688   /* loop invariant - POS_BYTE points at where last char (first
1689      char if reverse) of pattern would align in a possible match.  */
1690   while (n != 0)
1691     {
1692       int tail_end;
1693       unsigned char *tail_end_ptr;
1694
1695       /* It's been reported that some (broken) compiler thinks that
1696          Boolean expressions in an arithmetic context are unsigned.
1697          Using an explicit ?1:0 prevents this.  */
1698       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1699           < 0)
1700         return (n * (0 - direction));
1701       /* First we do the part we can by pointers (maybe nothing) */
1702       QUIT;
1703       pat = base_pat;
1704       limit = pos_byte - dirlen + direction;
1705       if (direction > 0)
1706         {
1707           limit = BUFFER_CEILING_OF (limit);
1708           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1709              can take on without hitting edge of buffer or the gap.  */
1710           limit = min (limit, pos_byte + 20000);
1711           limit = min (limit, lim_byte - 1);
1712         }
1713       else
1714         {
1715           limit = BUFFER_FLOOR_OF (limit);
1716           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1717              can take on without hitting edge of buffer or the gap.  */
1718           limit = max (limit, pos_byte - 20000);
1719           limit = max (limit, lim_byte);
1720         }
1721       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1722       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1723
1724       if ((limit - pos_byte) * direction > 20)
1725         {
1726           unsigned char *p2;
1727
1728           p_limit = BYTE_POS_ADDR (limit);
1729           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1730           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1731           while (1)             /* use one cursor setting as long as i can */
1732             {
1733               if (direction > 0) /* worth duplicating */
1734                 {
1735                   /* Use signed comparison if appropriate
1736                      to make cursor+infinity sure to be > p_limit.
1737                      Assuming that the buffer lies in a range of addresses
1738                      that are all "positive" (as ints) or all "negative",
1739                      either kind of comparison will work as long
1740                      as we don't step by infinity.  So pick the kind
1741                      that works when we do step by infinity.  */
1742                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1743                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1744                       cursor += BM_tab[*cursor];
1745                   else
1746                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1747                       cursor += BM_tab[*cursor];
1748                 }
1749               else
1750                 {
1751                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1752                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1753                       cursor += BM_tab[*cursor];
1754                   else
1755                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1756                       cursor += BM_tab[*cursor];
1757                 }
1758 /* If you are here, cursor is beyond the end of the searched region. */
1759 /* This can happen if you match on the far character of the pattern, */
1760 /* because the "stride" of that character is infinity, a number able */
1761 /* to throw you well beyond the end of the search.  It can also */
1762 /* happen if you fail to match within the permitted region and would */
1763 /* otherwise try a character beyond that region */
1764               if ((cursor - p_limit) * direction <= len_byte)
1765                 break;  /* a small overrun is genuine */
1766               cursor -= infinity; /* large overrun = hit */
1767               i = dirlen - direction;
1768               if (! NILP (trt))
1769                 {
1770                   while ((i -= direction) + direction != 0)
1771                     {
1772                       int ch;
1773                       cursor -= direction;
1774                       /* Translate only the last byte of a character.  */
1775                       if (! multibyte
1776                           || ((cursor == tail_end_ptr
1777                                || CHAR_HEAD_P (cursor[1]))
1778                               && (CHAR_HEAD_P (cursor[0])
1779                                   || (translate_prev_byte == cursor[-1]
1780                                       && (CHAR_HEAD_P (translate_prev_byte)
1781                                           || translate_anteprev_byte == cursor[-2])))))
1782                         ch = simple_translate[*cursor];
1783                       else
1784                         ch = *cursor;
1785                       if (pat[i] != ch)
1786                         break;
1787                     }
1788                 }
1789               else
1790                 {
1791                   while ((i -= direction) + direction != 0)
1792                     {
1793                       cursor -= direction;
1794                       if (pat[i] != *cursor)
1795                         break;
1796                     }
1797                 }
1798               cursor += dirlen - i - direction; /* fix cursor */
1799               if (i + direction == 0)
1800                 {
1801                   int position;
1802
1803                   cursor -= direction;
1804
1805                   position = pos_byte + cursor - p2 + ((direction > 0)
1806                                                        ? 1 - len_byte : 0);
1807                   set_search_regs (position, len_byte);
1808
1809                   if ((n -= direction) != 0)
1810                     cursor += dirlen; /* to resume search */
1811                   else
1812                     return ((direction > 0)
1813                             ? search_regs.end[0] : search_regs.start[0]);
1814                 }
1815               else
1816                 cursor += stride_for_teases; /* <sigh> we lose -  */
1817             }
1818           pos_byte += cursor - p2;
1819         }
1820       else
1821         /* Now we'll pick up a clump that has to be done the hard */
1822         /* way because it covers a discontinuity */
1823         {
1824           limit = ((direction > 0)
1825                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1826                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1827           limit = ((direction > 0)
1828                    ? min (limit + len_byte, lim_byte - 1)
1829                    : max (limit - len_byte, lim_byte));
1830           /* LIMIT is now the last value POS_BYTE can have
1831              and still be valid for a possible match.  */
1832           while (1)
1833             {
1834               /* This loop can be coded for space rather than */
1835               /* speed because it will usually run only once. */
1836               /* (the reach is at most len + 21, and typically */
1837               /* does not exceed len) */
1838               while ((limit - pos_byte) * direction >= 0)
1839                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1840               /* now run the same tests to distinguish going off the */
1841               /* end, a match or a phony match. */
1842               if ((pos_byte - limit) * direction <= len_byte)
1843                 break;  /* ran off the end */
1844               /* Found what might be a match.
1845                  Set POS_BYTE back to last (first if reverse) pos.  */
1846               pos_byte -= infinity;
1847               i = dirlen - direction;
1848               while ((i -= direction) + direction != 0)
1849                 {
1850                   int ch;
1851                   unsigned char *ptr;
1852                   pos_byte -= direction;
1853                   ptr = BYTE_POS_ADDR (pos_byte);
1854                   /* Translate only the last byte of a character.  */
1855                   if (! multibyte
1856                       || ((ptr == tail_end_ptr
1857                            || CHAR_HEAD_P (ptr[1]))
1858                           && (CHAR_HEAD_P (ptr[0])
1859                               || (translate_prev_byte == ptr[-1]
1860                                   && (CHAR_HEAD_P (translate_prev_byte)
1861                                       || translate_anteprev_byte == ptr[-2])))))
1862                     ch = simple_translate[*ptr];
1863                   else
1864                     ch = *ptr;
1865                   if (pat[i] != ch)
1866                     break;
1867                 }
1868               /* Above loop has moved POS_BYTE part or all the way
1869                  back to the first pos (last pos if reverse).
1870                  Set it once again at the last (first if reverse) char.  */
1871               pos_byte += dirlen - i- direction;
1872               if (i + direction == 0)
1873                 {
1874                   int position;
1875                   pos_byte -= direction;
1876
1877                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1878
1879                   set_search_regs (position, len_byte);
1880
1881                   if ((n -= direction) != 0)
1882                     pos_byte += dirlen; /* to resume search */
1883                   else
1884                     return ((direction > 0)
1885                             ? search_regs.end[0] : search_regs.start[0]);
1886                 }
1887               else
1888                 pos_byte += stride_for_teases;
1889             }
1890           }
1891       /* We have done one clump.  Can we continue? */
1892       if ((lim_byte - pos_byte) * direction < 0)
1893         return ((0 - n) * direction);
1894     }
1895   return BYTE_TO_CHAR (pos_byte);
1896 }
1897
1898 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1899    for a match just found in the current buffer.  */
1900
1901 static void
1902 set_search_regs (beg_byte, nbytes)
1903      int beg_byte, nbytes;
1904 {
1905   /* Make sure we have registers in which to store
1906      the match position.  */
1907   if (search_regs.num_regs == 0)
1908     {
1909       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1910       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1911       search_regs.num_regs = 2;
1912     }
1913
1914   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1915   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1916   XSETBUFFER (last_thing_searched, current_buffer);
1917 }
1918 \f
1919 /* Given a string of words separated by word delimiters,
1920   compute a regexp that matches those exact words
1921   separated by arbitrary punctuation.  */
1922
1923 static Lisp_Object
1924 wordify (string)
1925      Lisp_Object string;
1926 {
1927   register unsigned char *p, *o;
1928   register int i, i_byte, len, punct_count = 0, word_count = 0;
1929   Lisp_Object val;
1930   int prev_c = 0;
1931   int adjust;
1932
1933   CHECK_STRING (string, 0);
1934   p = XSTRING (string)->data;
1935   len = XSTRING (string)->size;
1936
1937   for (i = 0, i_byte = 0; i < len; )
1938     {
1939       int c;
1940
1941       if (STRING_MULTIBYTE (string))
1942         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1943       else
1944         c = XSTRING (string)->data[i++];
1945
1946       if (SYNTAX (c) != Sword)
1947         {
1948           punct_count++;
1949           if (i > 0 && SYNTAX (prev_c) == Sword)
1950             word_count++;
1951         }
1952
1953       prev_c = c;
1954     }
1955
1956   if (SYNTAX (prev_c) == Sword)
1957     word_count++;
1958   if (!word_count)
1959     return build_string ("");
1960
1961   adjust = - punct_count + 5 * (word_count - 1) + 4;
1962   val = make_uninit_multibyte_string (len + adjust,
1963                                       STRING_BYTES (XSTRING (string)) + adjust);
1964
1965   o = XSTRING (val)->data;
1966   *o++ = '\\';
1967   *o++ = 'b';
1968   prev_c = 0;
1969
1970   for (i = 0, i_byte = 0; i < len; )
1971     {
1972       int c;
1973       int i_byte_orig = i_byte;
1974
1975       if (STRING_MULTIBYTE (string))
1976         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1977       else
1978         c = XSTRING (string)->data[i++];
1979
1980       if (SYNTAX (c) == Sword)
1981         {
1982           bcopy (&XSTRING (string)->data[i_byte_orig], o,
1983                  i_byte - i_byte_orig);
1984           o += i_byte - i_byte_orig;
1985         }
1986       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
1987         {
1988           *o++ = '\\';
1989           *o++ = 'W';
1990           *o++ = '\\';
1991           *o++ = 'W';
1992           *o++ = '*';
1993         }
1994
1995       prev_c = c;
1996     }
1997
1998   *o++ = '\\';
1999   *o++ = 'b';
2000
2001   return val;
2002 }
2003 \f
2004 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2005   "MSearch backward: ",
2006   "Search backward from point for STRING.\n\
2007 Set point to the beginning of the occurrence found, and return point.\n\
2008 An optional second argument bounds the search; it is a buffer position.\n\
2009 The match found must not extend before that position.\n\
2010 Optional third argument, if t, means if fail just return nil (no error).\n\
2011  If not nil and not t, position at limit of search and return nil.\n\
2012 Optional fourth argument is repeat count--search for successive occurrences.\n\
2013 See also the functions `match-beginning', `match-end' and `replace-match'.")
2014   (string, bound, noerror, count)
2015      Lisp_Object string, bound, noerror, count;
2016 {
2017   return search_command (string, bound, noerror, count, -1, 0, 0);
2018 }
2019
2020 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2021   "Search forward from point for STRING.\n\
2022 Set point to the end of the occurrence found, and return point.\n\
2023 An optional second argument bounds the search; it is a buffer position.\n\
2024 The match found must not extend after that position.  nil is equivalent\n\
2025   to (point-max).\n\
2026 Optional third argument, if t, means if fail just return nil (no error).\n\
2027   If not nil and not t, move to limit of search and return nil.\n\
2028 Optional fourth argument is repeat count--search for successive occurrences.\n\
2029 See also the functions `match-beginning', `match-end' and `replace-match'.")
2030   (string, bound, noerror, count)
2031      Lisp_Object string, bound, noerror, count;
2032 {
2033   return search_command (string, bound, noerror, count, 1, 0, 0);
2034 }
2035
2036 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2037   "sWord search backward: ",
2038   "Search backward from point for STRING, ignoring differences in punctuation.\n\
2039 Set point to the beginning of the occurrence found, and return point.\n\
2040 An optional second argument bounds the search; it is a buffer position.\n\
2041 The match found must not extend before that position.\n\
2042 Optional third argument, if t, means if fail just return nil (no error).\n\
2043   If not nil and not t, move to limit of search and return nil.\n\
2044 Optional fourth argument is repeat count--search for successive occurrences.")
2045   (string, bound, noerror, count)
2046      Lisp_Object string, bound, noerror, count;
2047 {
2048   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2049 }
2050
2051 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2052   "sWord search: ",
2053   "Search forward from point for STRING, ignoring differences in punctuation.\n\
2054 Set point to the end of the occurrence found, and return point.\n\
2055 An optional second argument bounds the search; it is a buffer position.\n\
2056 The match found must not extend after that position.\n\
2057 Optional third argument, if t, means if fail just return nil (no error).\n\
2058   If not nil and not t, move to limit of search and return nil.\n\
2059 Optional fourth argument is repeat count--search for successive occurrences.")
2060   (string, bound, noerror, count)
2061      Lisp_Object string, bound, noerror, count;
2062 {
2063   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2064 }
2065
2066 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2067   "sRE search backward: ",
2068   "Search backward from point for match for regular expression REGEXP.\n\
2069 Set point to the beginning of the match, and return point.\n\
2070 The match found is the one starting last in the buffer\n\
2071 and yet ending before the origin of the search.\n\
2072 An optional second argument bounds the search; it is a buffer position.\n\
2073 The match found must start at or after that position.\n\
2074 Optional third argument, if t, means if fail just return nil (no error).\n\
2075   If not nil and not t, move to limit of search and return nil.\n\
2076 Optional fourth argument is repeat count--search for successive occurrences.\n\
2077 See also the functions `match-beginning', `match-end' and `replace-match'.")
2078   (regexp, bound, noerror, count)
2079      Lisp_Object regexp, bound, noerror, count;
2080 {
2081   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2082 }
2083
2084 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2085   "sRE search: ",
2086   "Search forward from point for regular expression REGEXP.\n\
2087 Set point to the end of the occurrence found, and return point.\n\
2088 An optional second argument bounds the search; it is a buffer position.\n\
2089 The match found must not extend after that position.\n\
2090 Optional third argument, if t, means if fail just return nil (no error).\n\
2091   If not nil and not t, move to limit of search and return nil.\n\
2092 Optional fourth argument is repeat count--search for successive occurrences.\n\
2093 See also the functions `match-beginning', `match-end' and `replace-match'.")
2094   (regexp, bound, noerror, count)
2095      Lisp_Object regexp, bound, noerror, count;
2096 {
2097   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2098 }
2099
2100 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2101   "sPosix search backward: ",
2102   "Search backward from point for match for regular expression REGEXP.\n\
2103 Find the longest match in accord with Posix regular expression rules.\n\
2104 Set point to the beginning of the match, and return point.\n\
2105 The match found is the one starting last in the buffer\n\
2106 and yet ending before the origin of the search.\n\
2107 An optional second argument bounds the search; it is a buffer position.\n\
2108 The match found must start at or after that position.\n\
2109 Optional third argument, if t, means if fail just return nil (no error).\n\
2110   If not nil and not t, move to limit of search and return nil.\n\
2111 Optional fourth argument is repeat count--search for successive occurrences.\n\
2112 See also the functions `match-beginning', `match-end' and `replace-match'.")
2113   (regexp, bound, noerror, count)
2114      Lisp_Object regexp, bound, noerror, count;
2115 {
2116   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2117 }
2118
2119 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2120   "sPosix search: ",
2121   "Search forward from point for regular expression REGEXP.\n\
2122 Find the longest match in accord with Posix regular expression rules.\n\
2123 Set point to the end of the occurrence found, and return point.\n\
2124 An optional second argument bounds the search; it is a buffer position.\n\
2125 The match found must not extend after that position.\n\
2126 Optional third argument, if t, means if fail just return nil (no error).\n\
2127   If not nil and not t, move to limit of search and return nil.\n\
2128 Optional fourth argument is repeat count--search for successive occurrences.\n\
2129 See also the functions `match-beginning', `match-end' and `replace-match'.")
2130   (regexp, bound, noerror, count)
2131      Lisp_Object regexp, bound, noerror, count;
2132 {
2133   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2134 }
2135 \f
2136 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2137   "Replace text matched by last search with NEWTEXT.\n\
2138 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2139 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2140 based on the replaced text.\n\
2141 If the replaced text has only capital letters\n\
2142 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2143 If the replaced text has at least one word starting with a capital letter,\n\
2144 then capitalize each word in NEWTEXT.\n\n\
2145 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2146 Otherwise treat `\\' as special:\n\
2147   `\\&' in NEWTEXT means substitute original matched text.\n\
2148   `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2149        If Nth parens didn't match, substitute nothing.\n\
2150   `\\\\' means insert one `\\'.\n\
2151 FIXEDCASE and LITERAL are optional arguments.\n\
2152 Leaves point at end of replacement text.\n\
2153 \n\
2154 The optional fourth argument STRING can be a string to modify.\n\
2155 In that case, this function creates and returns a new string\n\
2156 which is made by replacing the part of STRING that was matched.\n\
2157 \n\
2158 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2159 It says to replace just that subexpression instead of the whole match.\n\
2160 This is useful only after a regular expression search or match\n\
2161 since only regular expressions have distinguished subexpressions.")
2162   (newtext, fixedcase, literal, string, subexp)
2163      Lisp_Object newtext, fixedcase, literal, string, subexp;
2164 {
2165   enum { nochange, all_caps, cap_initial } case_action;
2166   register int pos, last;
2167   int some_multiletter_word;
2168   int some_lowercase;
2169   int some_uppercase;
2170   int some_nonuppercase_initial;
2171   register int c, prevc;
2172   int inslen;
2173   int sub;
2174   int opoint, newpoint;
2175
2176   CHECK_STRING (newtext, 0);
2177
2178   if (! NILP (string))
2179     CHECK_STRING (string, 4);
2180
2181   case_action = nochange;       /* We tried an initialization */
2182                                 /* but some C compilers blew it */
2183
2184   if (search_regs.num_regs <= 0)
2185     error ("replace-match called before any match found");
2186
2187   if (NILP (subexp))
2188     sub = 0;
2189   else
2190     {
2191       CHECK_NUMBER (subexp, 3);
2192       sub = XINT (subexp);
2193       if (sub < 0 || sub >= search_regs.num_regs)
2194         args_out_of_range (subexp, make_number (search_regs.num_regs));
2195     }
2196
2197   if (NILP (string))
2198     {
2199       if (search_regs.start[sub] < BEGV
2200           || search_regs.start[sub] > search_regs.end[sub]
2201           || search_regs.end[sub] > ZV)
2202         args_out_of_range (make_number (search_regs.start[sub]),
2203                            make_number (search_regs.end[sub]));
2204     }
2205   else
2206     {
2207       if (search_regs.start[sub] < 0
2208           || search_regs.start[sub] > search_regs.end[sub]
2209           || search_regs.end[sub] > XSTRING (string)->size)
2210         args_out_of_range (make_number (search_regs.start[sub]),
2211                            make_number (search_regs.end[sub]));
2212     }
2213
2214   if (NILP (fixedcase))
2215     {
2216       int beg;
2217       /* Decide how to casify by examining the matched text. */
2218
2219       if (NILP (string))
2220         last = CHAR_TO_BYTE (search_regs.end[sub]);
2221       else
2222         last = search_regs.end[sub];
2223
2224       if (NILP (string))
2225         beg = CHAR_TO_BYTE (search_regs.start[sub]);
2226       else
2227         beg = search_regs.start[sub];
2228
2229       prevc = '\n';
2230       case_action = all_caps;
2231
2232       /* some_multiletter_word is set nonzero if any original word
2233          is more than one letter long. */
2234       some_multiletter_word = 0;
2235       some_lowercase = 0;
2236       some_nonuppercase_initial = 0;
2237       some_uppercase = 0;
2238
2239       for (pos = beg; pos < last; pos++)
2240         {
2241           if (NILP (string))
2242             c = FETCH_BYTE (pos);
2243           else
2244             c = XSTRING (string)->data[pos];
2245
2246           if (LOWERCASEP (c))
2247             {
2248               /* Cannot be all caps if any original char is lower case */
2249
2250               some_lowercase = 1;
2251               if (SYNTAX (prevc) != Sword)
2252                 some_nonuppercase_initial = 1;
2253               else
2254                 some_multiletter_word = 1;
2255             }
2256           else if (!NOCASEP (c))
2257             {
2258               some_uppercase = 1;
2259               if (SYNTAX (prevc) != Sword)
2260                 ;
2261               else
2262                 some_multiletter_word = 1;
2263             }
2264           else
2265             {
2266               /* If the initial is a caseless word constituent,
2267                  treat that like a lowercase initial.  */
2268               if (SYNTAX (prevc) != Sword)
2269                 some_nonuppercase_initial = 1;
2270             }
2271
2272           prevc = c;
2273         }
2274
2275       /* Convert to all caps if the old text is all caps
2276          and has at least one multiletter word.  */
2277       if (! some_lowercase && some_multiletter_word)
2278         case_action = all_caps;
2279       /* Capitalize each word, if the old text has all capitalized words.  */
2280       else if (!some_nonuppercase_initial && some_multiletter_word)
2281         case_action = cap_initial;
2282       else if (!some_nonuppercase_initial && some_uppercase)
2283         /* Should x -> yz, operating on X, give Yz or YZ?
2284            We'll assume the latter.  */
2285         case_action = all_caps;
2286       else
2287         case_action = nochange;
2288     }
2289
2290   /* Do replacement in a string.  */
2291   if (!NILP (string))
2292     {
2293       Lisp_Object before, after;
2294
2295       before = Fsubstring (string, make_number (0),
2296                            make_number (search_regs.start[sub]));
2297       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2298
2299       /* Substitute parts of the match into NEWTEXT
2300          if desired.  */
2301       if (NILP (literal))
2302         {
2303           int lastpos = 0;
2304           int lastpos_byte = 0;
2305           /* We build up the substituted string in ACCUM.  */
2306           Lisp_Object accum;
2307           Lisp_Object middle;
2308           int pos_byte;
2309
2310           accum = Qnil;
2311
2312           for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
2313             {
2314               int substart = -1;
2315               int subend;
2316               int delbackslash = 0;
2317
2318               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2319
2320               if (c == '\\')
2321                 {
2322                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2323                   if (c == '&')
2324                     {
2325                       substart = search_regs.start[sub];
2326                       subend = search_regs.end[sub];
2327                     }
2328                   else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2329                     {
2330                       if (search_regs.start[c - '0'] >= 0)
2331                         {
2332                           substart = search_regs.start[c - '0'];
2333                           subend = search_regs.end[c - '0'];
2334                         }
2335                     }
2336                   else if (c == '\\')
2337                     delbackslash = 1;
2338                   else
2339                     error ("Invalid use of `\\' in replacement text");
2340                 }
2341               if (substart >= 0)
2342                 {
2343                   if (pos - 2 != lastpos)
2344                     middle = substring_both (newtext, lastpos,
2345                                              lastpos_byte,
2346                                              pos - 2, pos_byte - 2);
2347                   else
2348                     middle = Qnil;
2349                   accum = concat3 (accum, middle,
2350                                    Fsubstring (string,
2351                                                make_number (substart),
2352                                                make_number (subend)));
2353                   lastpos = pos;
2354                   lastpos_byte = pos_byte;
2355                 }
2356               else if (delbackslash)
2357                 {
2358                   middle = substring_both (newtext, lastpos,
2359                                            lastpos_byte,
2360                                            pos - 1, pos_byte - 1);
2361
2362                   accum = concat2 (accum, middle);
2363                   lastpos = pos;
2364                   lastpos_byte = pos_byte;
2365                 }
2366             }
2367
2368           if (pos != lastpos)
2369             middle = substring_both (newtext, lastpos,
2370                                      lastpos_byte,
2371                                      pos, pos_byte);
2372           else
2373             middle = Qnil;
2374
2375           newtext = concat2 (accum, middle);
2376         }
2377
2378       /* Do case substitution in NEWTEXT if desired.  */
2379       if (case_action == all_caps)
2380         newtext = Fupcase (newtext);
2381       else if (case_action == cap_initial)
2382         newtext = Fupcase_initials (newtext);
2383
2384       return concat3 (before, newtext, after);
2385     }
2386
2387   /* Record point, the move (quietly) to the start of the match.  */
2388   if (PT > search_regs.start[sub])
2389     opoint = PT - ZV;
2390   else
2391     opoint = PT;
2392
2393   TEMP_SET_PT (search_regs.start[sub]);
2394
2395   /* We insert the replacement text before the old text, and then
2396      delete the original text.  This means that markers at the
2397      beginning or end of the original will float to the corresponding
2398      position in the replacement.  */
2399   if (!NILP (literal))
2400     Finsert_and_inherit (1, &newtext);
2401   else
2402     {
2403       struct gcpro gcpro1;
2404       GCPRO1 (newtext);
2405
2406       for (pos = 0; pos < XSTRING (newtext)->size; pos++)
2407         {
2408           int offset = PT - search_regs.start[sub];
2409
2410           c = XSTRING (newtext)->data[pos];
2411           if (c == '\\')
2412             {
2413               c = XSTRING (newtext)->data[++pos];
2414               if (c == '&')
2415                 Finsert_buffer_substring
2416                   (Fcurrent_buffer (),
2417                    make_number (search_regs.start[sub] + offset),
2418                    make_number (search_regs.end[sub] + offset));
2419               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2420                 {
2421                   if (search_regs.start[c - '0'] >= 1)
2422                     Finsert_buffer_substring
2423                       (Fcurrent_buffer (),
2424                        make_number (search_regs.start[c - '0'] + offset),
2425                        make_number (search_regs.end[c - '0'] + offset));
2426                 }
2427               else if (c == '\\')
2428                 insert_char (c);
2429               else
2430                 error ("Invalid use of `\\' in replacement text");
2431             }
2432           else
2433             insert_char (c);
2434         }
2435       UNGCPRO;
2436     }
2437
2438   inslen = PT - (search_regs.start[sub]);
2439   del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
2440
2441   if (case_action == all_caps)
2442     Fupcase_region (make_number (PT - inslen), make_number (PT));
2443   else if (case_action == cap_initial)
2444     Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
2445
2446   newpoint = PT;
2447
2448   /* Put point back where it was in the text.  */
2449   if (opoint <= 0)
2450     TEMP_SET_PT (opoint + ZV);
2451   else
2452     TEMP_SET_PT (opoint);
2453
2454   /* Now move point "officially" to the start of the inserted replacement.  */
2455   move_if_not_intangible (newpoint);
2456
2457   return Qnil;
2458 }
2459 \f
2460 static Lisp_Object
2461 match_limit (num, beginningp)
2462      Lisp_Object num;
2463      int beginningp;
2464 {
2465   register int n;
2466
2467   CHECK_NUMBER (num, 0);
2468   n = XINT (num);
2469   if (n < 0 || n >= search_regs.num_regs)
2470     args_out_of_range (num, make_number (search_regs.num_regs));
2471   if (search_regs.num_regs <= 0
2472       || search_regs.start[n] < 0)
2473     return Qnil;
2474   return (make_number ((beginningp) ? search_regs.start[n]
2475                                     : search_regs.end[n]));
2476 }
2477
2478 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2479   "Return position of start of text matched by last search.\n\
2480 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2481   regexp.\n\
2482 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2483   SUBEXP pairs.\n\
2484 Zero means the entire text matched by the whole regexp or whole string.")
2485   (subexp)
2486      Lisp_Object subexp;
2487 {
2488   return match_limit (subexp, 1);
2489 }
2490
2491 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2492   "Return position of end of text matched by last search.\n\
2493 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2494   regexp.\n\
2495 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2496   SUBEXP pairs.\n\
2497 Zero means the entire text matched by the whole regexp or whole string.")
2498   (subexp)
2499      Lisp_Object subexp;
2500 {
2501   return match_limit (subexp, 0);
2502 }
2503
2504 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2505   "Return a list containing all info on what the last search matched.\n\
2506 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2507 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2508 if the last match was on a buffer; integers or nil if a string was matched.\n\
2509 Use `store-match-data' to reinstate the data in this list.\n\
2510 \n\
2511 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2512 \(rather than markers) to represent buffer positions.\n\
2513 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough\n\
2514 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2515   (integers, reuse)
2516      Lisp_Object integers, reuse;
2517 {
2518   Lisp_Object tail, prev;
2519   Lisp_Object *data;
2520   int i, len;
2521
2522   if (NILP (last_thing_searched))
2523     return Qnil;
2524
2525   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2526                                  * sizeof (Lisp_Object));
2527
2528   len = -1;
2529   for (i = 0; i < search_regs.num_regs; i++)
2530     {
2531       int start = search_regs.start[i];
2532       if (start >= 0)
2533         {
2534           if (EQ (last_thing_searched, Qt)
2535               || ! NILP (integers))
2536             {
2537               XSETFASTINT (data[2 * i], start);
2538               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2539             }
2540           else if (BUFFERP (last_thing_searched))
2541             {
2542               data[2 * i] = Fmake_marker ();
2543               Fset_marker (data[2 * i],
2544                            make_number (start),
2545                            last_thing_searched);
2546               data[2 * i + 1] = Fmake_marker ();
2547               Fset_marker (data[2 * i + 1],
2548                            make_number (search_regs.end[i]),
2549                            last_thing_searched);
2550             }
2551           else
2552             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2553             abort ();
2554
2555           len = i;
2556         }
2557       else
2558         data[2 * i] = data [2 * i + 1] = Qnil;
2559     }
2560
2561   /* If REUSE is not usable, cons up the values and return them.  */
2562   if (! CONSP (reuse))
2563     return Flist (2 * len + 2, data);
2564
2565   /* If REUSE is a list, store as many value elements as will fit
2566      into the elements of REUSE.  */
2567   for (i = 0, tail = reuse; CONSP (tail);
2568        i++, tail = XCONS (tail)->cdr)
2569     {
2570       if (i < 2 * len + 2)
2571         XCONS (tail)->car = data[i];
2572       else
2573         XCONS (tail)->car = Qnil;
2574       prev = tail;
2575     }
2576
2577   /* If we couldn't fit all value elements into REUSE,
2578      cons up the rest of them and add them to the end of REUSE.  */
2579   if (i < 2 * len + 2)
2580     XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2581
2582   return reuse;
2583 }
2584
2585
2586 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2587   "Set internal data on last search match from elements of LIST.\n\
2588 LIST should have been created by calling `match-data' previously.")
2589   (list)
2590      register Lisp_Object list;
2591 {
2592   register int i;
2593   register Lisp_Object marker;
2594
2595   if (running_asynch_code)
2596     save_search_regs ();
2597
2598   if (!CONSP (list) && !NILP (list))
2599     list = wrong_type_argument (Qconsp, list);
2600
2601   /* Unless we find a marker with a buffer in LIST, assume that this
2602      match data came from a string.  */
2603   last_thing_searched = Qt;
2604
2605   /* Allocate registers if they don't already exist.  */
2606   {
2607     int length = XFASTINT (Flength (list)) / 2;
2608
2609     if (length > search_regs.num_regs)
2610       {
2611         if (search_regs.num_regs == 0)
2612           {
2613             search_regs.start
2614               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2615             search_regs.end
2616               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2617           }
2618         else
2619           {
2620             search_regs.start
2621               = (regoff_t *) xrealloc (search_regs.start,
2622                                        length * sizeof (regoff_t));
2623             search_regs.end
2624               = (regoff_t *) xrealloc (search_regs.end,
2625                                        length * sizeof (regoff_t));
2626           }
2627
2628         search_regs.num_regs = length;
2629       }
2630   }
2631
2632   for (i = 0; i < search_regs.num_regs; i++)
2633     {
2634       marker = Fcar (list);
2635       if (NILP (marker))
2636         {
2637           search_regs.start[i] = -1;
2638           list = Fcdr (list);
2639         }
2640       else
2641         {
2642           if (MARKERP (marker))
2643             {
2644               if (XMARKER (marker)->buffer == 0)
2645                 XSETFASTINT (marker, 0);
2646               else
2647                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2648             }
2649
2650           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2651           search_regs.start[i] = XINT (marker);
2652           list = Fcdr (list);
2653
2654           marker = Fcar (list);
2655           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2656             XSETFASTINT (marker, 0);
2657
2658           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2659           search_regs.end[i] = XINT (marker);
2660         }
2661       list = Fcdr (list);
2662     }
2663
2664   return Qnil;
2665 }
2666
2667 /* If non-zero the match data have been saved in saved_search_regs
2668    during the execution of a sentinel or filter. */
2669 static int search_regs_saved;
2670 static struct re_registers saved_search_regs;
2671
2672 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2673    if asynchronous code (filter or sentinel) is running. */
2674 static void
2675 save_search_regs ()
2676 {
2677   if (!search_regs_saved)
2678     {
2679       saved_search_regs.num_regs = search_regs.num_regs;
2680       saved_search_regs.start = search_regs.start;
2681       saved_search_regs.end = search_regs.end;
2682       search_regs.num_regs = 0;
2683       search_regs.start = 0;
2684       search_regs.end = 0;
2685
2686       search_regs_saved = 1;
2687     }
2688 }
2689
2690 /* Called upon exit from filters and sentinels. */
2691 void
2692 restore_match_data ()
2693 {
2694   if (search_regs_saved)
2695     {
2696       if (search_regs.num_regs > 0)
2697         {
2698           xfree (search_regs.start);
2699           xfree (search_regs.end);
2700         }
2701       search_regs.num_regs = saved_search_regs.num_regs;
2702       search_regs.start = saved_search_regs.start;
2703       search_regs.end = saved_search_regs.end;
2704
2705       search_regs_saved = 0;
2706     }
2707 }
2708
2709 /* Quote a string to inactivate reg-expr chars */
2710
2711 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2712   "Return a regexp string which matches exactly STRING and nothing else.")
2713   (string)
2714      Lisp_Object string;
2715 {
2716   register unsigned char *in, *out, *end;
2717   register unsigned char *temp;
2718   int backslashes_added = 0;
2719
2720   CHECK_STRING (string, 0);
2721
2722   temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
2723
2724   /* Now copy the data into the new string, inserting escapes. */
2725
2726   in = XSTRING (string)->data;
2727   end = in + STRING_BYTES (XSTRING (string));
2728   out = temp;
2729
2730   for (; in != end; in++)
2731     {
2732       if (*in == '[' || *in == ']'
2733           || *in == '*' || *in == '.' || *in == '\\'
2734           || *in == '?' || *in == '+'
2735           || *in == '^' || *in == '$')
2736         *out++ = '\\', backslashes_added++;
2737       *out++ = *in;
2738     }
2739
2740   return make_specified_string (temp,
2741                                 XSTRING (string)->size + backslashes_added,
2742                                 out - temp,
2743                                 STRING_MULTIBYTE (string));
2744 }
2745 \f
2746 void
2747 syms_of_search ()
2748 {
2749   register int i;
2750
2751   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2752     {
2753       searchbufs[i].buf.allocated = 100;
2754       searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2755       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2756       searchbufs[i].regexp = Qnil;
2757       staticpro (&searchbufs[i].regexp);
2758       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2759     }
2760   searchbuf_head = &searchbufs[0];
2761
2762   Qsearch_failed = intern ("search-failed");
2763   staticpro (&Qsearch_failed);
2764   Qinvalid_regexp = intern ("invalid-regexp");
2765   staticpro (&Qinvalid_regexp);
2766
2767   Fput (Qsearch_failed, Qerror_conditions,
2768         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2769   Fput (Qsearch_failed, Qerror_message,
2770         build_string ("Search failed"));
2771
2772   Fput (Qinvalid_regexp, Qerror_conditions,
2773         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2774   Fput (Qinvalid_regexp, Qerror_message,
2775         build_string ("Invalid regexp"));
2776
2777   last_thing_searched = Qnil;
2778   staticpro (&last_thing_searched);
2779
2780   defsubr (&Slooking_at);
2781   defsubr (&Sposix_looking_at);
2782   defsubr (&Sstring_match);
2783   defsubr (&Sposix_string_match);
2784   defsubr (&Ssearch_forward);
2785   defsubr (&Ssearch_backward);
2786   defsubr (&Sword_search_forward);
2787   defsubr (&Sword_search_backward);
2788   defsubr (&Sre_search_forward);
2789   defsubr (&Sre_search_backward);
2790   defsubr (&Sposix_search_forward);
2791   defsubr (&Sposix_search_backward);
2792   defsubr (&Sreplace_match);
2793   defsubr (&Smatch_beginning);
2794   defsubr (&Smatch_end);
2795   defsubr (&Smatch_data);
2796   defsubr (&Sset_match_data);
2797   defsubr (&Sregexp_quote);
2798 }