code.delx.au - gnu-emacs/blob - src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #ifdef STDC_HEADERS
  24 #include <stdlib.h>
  25 #endif
  26 #include "lisp.h"
  27 #include "syntax.h"
  28 #include "category.h"
  29 #include "buffer.h"
  30 #include "charset.h"
  31 #include "region-cache.h"
  32 #include "commands.h"
  33 #include "blockinput.h"
  34 #include "intervals.h"
  35
  36 #include <sys/types.h>
  37 #include "regex.h"
  38
  39 #define min(a, b) ((a) < (b) ? (a) : (b))
  40 #define max(a, b) ((a) > (b) ? (a) : (b))
  41
  42 #define REGEXP_CACHE_SIZE 20
  43
  44 /* If the regexp is non-nil, then the buffer contains the compiled form
  45    of that regexp, suitable for searching.  */
  46 struct regexp_cache
  47 {
  48   struct regexp_cache *next;
  49   Lisp_Object regexp;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 static void set_search_regs ();
  92 static void save_search_regs ();
  93 static int simple_search ();
  94 static int boyer_moore ();
  95 static int search_buffer ();
  96
  97 static void
  98 matcher_overflow ()
  99 {
 100   error ("Stack overflow in regexp matcher");
 101 }
 102
 103 #ifdef __STDC__
 104 #define CONST const
 105 #else
 106 #define CONST
 107 #endif
 108
 109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 110    PATTERN is the pattern to compile.
 111    CP is the place to put the result.
 112    TRANSLATE is a translation table for ignoring case, or nil for none.
 113    REGP is the structure that says where to store the "register"
 114    values that will result from matching this pattern.
 115    If it is 0, we should compile the pattern not to record any
 116    subexpression bounds.
 117    POSIX is nonzero if we want full backtracking (POSIX style)
 118    for this pattern.  0 means backtrack only enough to get a valid match.
 119    MULTIBYTE is nonzero if we want to handle multibyte characters in
 120    PATTERN.  0 means all multibyte characters are recognized just as
 121    sequences of binary data.  */
 122
 123 static void
 124 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 125      struct regexp_cache *cp;
 126      Lisp_Object pattern;
 127      Lisp_Object translate;
 128      struct re_registers *regp;
 129      int posix;
 130      int multibyte;
 131 {
 132   unsigned char *raw_pattern;
 133   int raw_pattern_size;
 134   char *val;
 135   reg_syntax_t old;
 136
 137   /* MULTIBYTE says whether the text to be searched is multibyte.
 138      We must convert PATTERN to match that, or we will not really
 139      find things right.  */
 140
 141   if (multibyte == STRING_MULTIBYTE (pattern))
 142     {
 143       raw_pattern = (unsigned char *) XSTRING (pattern)->data;
 144       raw_pattern_size = STRING_BYTES (XSTRING (pattern));
 145     }
 146   else if (multibyte)
 147     {
 148       raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
 149                                                   XSTRING (pattern)->size);
 150       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 151       copy_text (XSTRING (pattern)->data, raw_pattern,
 152                  XSTRING (pattern)->size, 0, 1);
 153     }
 154   else
 155     {
 156       /* Converting multibyte to single-byte.
 157
 158          ??? Perhaps this conversion should be done in a special way
 159          by subtracting nonascii-insert-offset from each non-ASCII char,
 160          so that only the multibyte chars which really correspond to
 161          the chosen single-byte character set can possibly match.  */
 162       raw_pattern_size = XSTRING (pattern)->size;
 163       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 164       copy_text (XSTRING (pattern)->data, raw_pattern,
 165                  STRING_BYTES (XSTRING (pattern)), 1, 0);
 166     }
 167
 168   cp->regexp = Qnil;
 169   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 170   cp->posix = posix;
 171   cp->buf.multibyte = multibyte;
 172   BLOCK_INPUT;
 173   old = re_set_syntax (RE_SYNTAX_EMACS
 174                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 175   val = (char *) re_compile_pattern ((char *)raw_pattern,
 176                                      raw_pattern_size, &cp->buf);
 177   re_set_syntax (old);
 178   UNBLOCK_INPUT;
 179   if (val)
 180     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 181
 182   cp->regexp = Fcopy_sequence (pattern);
 183 }
 184
 185 /* Compile a regexp if necessary, but first check to see if there's one in
 186    the cache.
 187    PATTERN is the pattern to compile.
 188    TRANSLATE is a translation table for ignoring case, or nil for none.
 189    REGP is the structure that says where to store the "register"
 190    values that will result from matching this pattern.
 191    If it is 0, we should compile the pattern not to record any
 192    subexpression bounds.
 193    POSIX is nonzero if we want full backtracking (POSIX style)
 194    for this pattern.  0 means backtrack only enough to get a valid match.  */
 195
 196 struct re_pattern_buffer *
 197 compile_pattern (pattern, regp, translate, posix, multibyte)
 198      Lisp_Object pattern;
 199      struct re_registers *regp;
 200      Lisp_Object translate;
 201      int posix, multibyte;
 202 {
 203   struct regexp_cache *cp, **cpp;
 204
 205   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 206     {
 207       cp = *cpp;
 208       if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
 209           && !NILP (Fstring_equal (cp->regexp, pattern))
 210           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 211           && cp->posix == posix
 212           && cp->buf.multibyte == multibyte)
 213         break;
 214
 215       /* If we're at the end of the cache, compile into the last cell.  */
 216       if (cp->next == 0)
 217         {
 218           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 219           break;
 220         }
 221     }
 222
 223   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 224      either because we found it in the cache or because we just compiled it.
 225      Move it to the front of the queue to mark it as most recently used.  */
 226   *cpp = cp->next;
 227   cp->next = searchbuf_head;
 228   searchbuf_head = cp;
 229
 230   /* Advise the searching functions about the space we have allocated
 231      for register data.  */
 232   if (regp)
 233     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 234
 235   return &cp->buf;
 236 }
 237
 238 /* Error condition used for failing searches */
 239 Lisp_Object Qsearch_failed;
 240
 241 Lisp_Object
 242 signal_failure (arg)
 243      Lisp_Object arg;
 244 {
 245   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 246   return Qnil;
 247 }
 248 \f
 249 static Lisp_Object
 250 looking_at_1 (string, posix)
 251      Lisp_Object string;
 252      int posix;
 253 {
 254   Lisp_Object val;
 255   unsigned char *p1, *p2;
 256   int s1, s2;
 257   register int i;
 258   struct re_pattern_buffer *bufp;
 259
 260   if (running_asynch_code)
 261     save_search_regs ();
 262
 263   CHECK_STRING (string, 0);
 264   bufp = compile_pattern (string, &search_regs,
 265                           (!NILP (current_buffer->case_fold_search)
 266                            ? DOWNCASE_TABLE : Qnil),
 267                           posix,
 268                           !NILP (current_buffer->enable_multibyte_characters));
 269
 270   immediate_quit = 1;
 271   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 272
 273   /* Get pointers and sizes of the two strings
 274      that make up the visible portion of the buffer. */
 275
 276   p1 = BEGV_ADDR;
 277   s1 = GPT_BYTE - BEGV_BYTE;
 278   p2 = GAP_END_ADDR;
 279   s2 = ZV_BYTE - GPT_BYTE;
 280   if (s1 < 0)
 281     {
 282       p2 = p1;
 283       s2 = ZV_BYTE - BEGV_BYTE;
 284       s1 = 0;
 285     }
 286   if (s2 < 0)
 287     {
 288       s1 = ZV_BYTE - BEGV_BYTE;
 289       s2 = 0;
 290     }
 291
 292   re_match_object = Qnil;
 293
 294   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 295                   PT_BYTE - BEGV_BYTE, &search_regs,
 296                   ZV_BYTE - BEGV_BYTE);
 297   if (i == -2)
 298     matcher_overflow ();
 299
 300   val = (0 <= i ? Qt : Qnil);
 301   if (i >= 0)
 302     for (i = 0; i < search_regs.num_regs; i++)
 303       if (search_regs.start[i] >= 0)
 304         {
 305           search_regs.start[i]
 306             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 307           search_regs.end[i]
 308             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 309         }
 310   XSETBUFFER (last_thing_searched, current_buffer);
 311   immediate_quit = 0;
 312   return val;
 313 }
 314
 315 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 316   "Return t if text after point matches regular expression REGEXP.\n\
 317 This function modifies the match data that `match-beginning',\n\
 318 `match-end' and `match-data' access; save and restore the match\n\
 319 data if you want to preserve them.")
 320   (regexp)
 321      Lisp_Object regexp;
 322 {
 323   return looking_at_1 (regexp, 0);
 324 }
 325
 326 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 327   "Return t if text after point matches regular expression REGEXP.\n\
 328 Find the longest match, in accord with Posix regular expression rules.\n\
 329 This function modifies the match data that `match-beginning',\n\
 330 `match-end' and `match-data' access; save and restore the match\n\
 331 data if you want to preserve them.")
 332   (regexp)
 333      Lisp_Object regexp;
 334 {
 335   return looking_at_1 (regexp, 1);
 336 }
 337 \f
 338 static Lisp_Object
 339 string_match_1 (regexp, string, start, posix)
 340      Lisp_Object regexp, string, start;
 341      int posix;
 342 {
 343   int val;
 344   struct re_pattern_buffer *bufp;
 345   int pos, pos_byte;
 346   int i;
 347
 348   if (running_asynch_code)
 349     save_search_regs ();
 350
 351   CHECK_STRING (regexp, 0);
 352   CHECK_STRING (string, 1);
 353
 354   if (NILP (start))
 355     pos = 0, pos_byte = 0;
 356   else
 357     {
 358       int len = XSTRING (string)->size;
 359
 360       CHECK_NUMBER (start, 2);
 361       pos = XINT (start);
 362       if (pos < 0 && -pos <= len)
 363         pos = len + pos;
 364       else if (0 > pos || pos > len)
 365         args_out_of_range (string, start);
 366       pos_byte = string_char_to_byte (string, pos);
 367     }
 368
 369   bufp = compile_pattern (regexp, &search_regs,
 370                           (!NILP (current_buffer->case_fold_search)
 371                            ? DOWNCASE_TABLE : Qnil),
 372                           posix,
 373                           STRING_MULTIBYTE (string));
 374   immediate_quit = 1;
 375   re_match_object = string;
 376
 377   val = re_search (bufp, (char *) XSTRING (string)->data,
 378                    STRING_BYTES (XSTRING (string)), pos_byte,
 379                    STRING_BYTES (XSTRING (string)) - pos_byte,
 380                    &search_regs);
 381   immediate_quit = 0;
 382   last_thing_searched = Qt;
 383   if (val == -2)
 384     matcher_overflow ();
 385   if (val < 0) return Qnil;
 386
 387   for (i = 0; i < search_regs.num_regs; i++)
 388     if (search_regs.start[i] >= 0)
 389       {
 390         search_regs.start[i]
 391           = string_byte_to_char (string, search_regs.start[i]);
 392         search_regs.end[i]
 393           = string_byte_to_char (string, search_regs.end[i]);
 394       }
 395
 396   return make_number (string_byte_to_char (string, val));
 397 }
 398
 399 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 400   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 401 If third arg START is non-nil, start search at that index in STRING.\n\
 402 For index of first char beyond the match, do (match-end 0).\n\
 403 `match-end' and `match-beginning' also give indices of substrings\n\
 404 matched by parenthesis constructs in the pattern.")
 405   (regexp, string, start)
 406      Lisp_Object regexp, string, start;
 407 {
 408   return string_match_1 (regexp, string, start, 0);
 409 }
 410
 411 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 412   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 413 Find the longest match, in accord with Posix regular expression rules.\n\
 414 If third arg START is non-nil, start search at that index in STRING.\n\
 415 For index of first char beyond the match, do (match-end 0).\n\
 416 `match-end' and `match-beginning' also give indices of substrings\n\
 417 matched by parenthesis constructs in the pattern.")
 418   (regexp, string, start)
 419      Lisp_Object regexp, string, start;
 420 {
 421   return string_match_1 (regexp, string, start, 1);
 422 }
 423
 424 /* Match REGEXP against STRING, searching all of STRING,
 425    and return the index of the match, or negative on failure.
 426    This does not clobber the match data.  */
 427
 428 int
 429 fast_string_match (regexp, string)
 430      Lisp_Object regexp, string;
 431 {
 432   int val;
 433   struct re_pattern_buffer *bufp;
 434
 435   bufp = compile_pattern (regexp, 0, Qnil,
 436                           0, STRING_MULTIBYTE (string));
 437   immediate_quit = 1;
 438   re_match_object = string;
 439
 440   val = re_search (bufp, (char *) XSTRING (string)->data,
 441                    STRING_BYTES (XSTRING (string)), 0,
 442                    STRING_BYTES (XSTRING (string)), 0);
 443   immediate_quit = 0;
 444   return val;
 445 }
 446
 447 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 448    and return the index of the match, or negative on failure.
 449    This does not clobber the match data.
 450    We assume that STRING contains single-byte characters.  */
 451
 452 extern Lisp_Object Vascii_downcase_table;
 453
 454 int
 455 fast_c_string_match_ignore_case (regexp, string)
 456      Lisp_Object regexp;
 457      char *string;
 458 {
 459   int val;
 460   struct re_pattern_buffer *bufp;
 461   int len = strlen (string);
 462
 463   regexp = string_make_unibyte (regexp);
 464   re_match_object = Qt;
 465   bufp = compile_pattern (regexp, 0,
 466                           Vascii_downcase_table, 0,
 467                           0);
 468   immediate_quit = 1;
 469   val = re_search (bufp, string, len, 0, len, 0);
 470   immediate_quit = 0;
 471   return val;
 472 }
 473 \f
 474 /* The newline cache: remembering which sections of text have no newlines.  */
 475
 476 /* If the user has requested newline caching, make sure it's on.
 477    Otherwise, make sure it's off.
 478    This is our cheezy way of associating an action with the change of
 479    state of a buffer-local variable.  */
 480 static void
 481 newline_cache_on_off (buf)
 482      struct buffer *buf;
 483 {
 484   if (NILP (buf->cache_long_line_scans))
 485     {
 486       /* It should be off.  */
 487       if (buf->newline_cache)
 488         {
 489           free_region_cache (buf->newline_cache);
 490           buf->newline_cache = 0;
 491         }
 492     }
 493   else
 494     {
 495       /* It should be on.  */
 496       if (buf->newline_cache == 0)
 497         buf->newline_cache = new_region_cache ();
 498     }
 499 }
 500
 501 \f
 502 /* Search for COUNT instances of the character TARGET between START and END.
 503
 504    If COUNT is positive, search forwards; END must be >= START.
 505    If COUNT is negative, search backwards for the -COUNTth instance;
 506       END must be <= START.
 507    If COUNT is zero, do anything you please; run rogue, for all I care.
 508
 509    If END is zero, use BEGV or ZV instead, as appropriate for the
 510    direction indicated by COUNT.
 511
 512    If we find COUNT instances, set *SHORTAGE to zero, and return the
 513    position after the COUNTth match.  Note that for reverse motion
 514    this is not the same as the usual convention for Emacs motion commands.
 515
 516    If we don't find COUNT instances before reaching END, set *SHORTAGE
 517    to the number of TARGETs left unfound, and return END.
 518
 519    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 520    except when inside redisplay.  */
 521
 522 int
 523 scan_buffer (target, start, end, count, shortage, allow_quit)
 524      register int target;
 525      int start, end;
 526      int count;
 527      int *shortage;
 528      int allow_quit;
 529 {
 530   struct region_cache *newline_cache;
 531   int direction;
 532
 533   if (count > 0)
 534     {
 535       direction = 1;
 536       if (! end) end = ZV;
 537     }
 538   else
 539     {
 540       direction = -1;
 541       if (! end) end = BEGV;
 542     }
 543
 544   newline_cache_on_off (current_buffer);
 545   newline_cache = current_buffer->newline_cache;
 546
 547   if (shortage != 0)
 548     *shortage = 0;
 549
 550   immediate_quit = allow_quit;
 551
 552   if (count > 0)
 553     while (start != end)
 554       {
 555         /* Our innermost scanning loop is very simple; it doesn't know
 556            about gaps, buffer ends, or the newline cache.  ceiling is
 557            the position of the last character before the next such
 558            obstacle --- the last character the dumb search loop should
 559            examine.  */
 560         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 561         int start_byte = CHAR_TO_BYTE (start);
 562         int tem;
 563
 564         /* If we're looking for a newline, consult the newline cache
 565            to see where we can avoid some scanning.  */
 566         if (target == '\n' && newline_cache)
 567           {
 568             int next_change;
 569             immediate_quit = 0;
 570             while (region_cache_forward
 571                    (current_buffer, newline_cache, start_byte, &next_change))
 572               start_byte = next_change;
 573             immediate_quit = allow_quit;
 574
 575             /* START should never be after END.  */
 576             if (start_byte > ceiling_byte)
 577               start_byte = ceiling_byte;
 578
 579             /* Now the text after start is an unknown region, and
 580                next_change is the position of the next known region. */
 581             ceiling_byte = min (next_change - 1, ceiling_byte);
 582           }
 583
 584         /* The dumb loop can only scan text stored in contiguous
 585            bytes. BUFFER_CEILING_OF returns the last character
 586            position that is contiguous, so the ceiling is the
 587            position after that.  */
 588         tem = BUFFER_CEILING_OF (start_byte);
 589         ceiling_byte = min (tem, ceiling_byte);
 590
 591         {
 592           /* The termination address of the dumb loop.  */
 593           register unsigned char *ceiling_addr
 594             = BYTE_POS_ADDR (ceiling_byte) + 1;
 595           register unsigned char *cursor
 596             = BYTE_POS_ADDR (start_byte);
 597           unsigned char *base = cursor;
 598
 599           while (cursor < ceiling_addr)
 600             {
 601               unsigned char *scan_start = cursor;
 602
 603               /* The dumb loop.  */
 604               while (*cursor != target && ++cursor < ceiling_addr)
 605                 ;
 606
 607               /* If we're looking for newlines, cache the fact that
 608                  the region from start to cursor is free of them. */
 609               if (target == '\n' && newline_cache)
 610                 know_region_cache (current_buffer, newline_cache,
 611                                    start_byte + scan_start - base,
 612                                    start_byte + cursor - base);
 613
 614               /* Did we find the target character?  */
 615               if (cursor < ceiling_addr)
 616                 {
 617                   if (--count == 0)
 618                     {
 619                       immediate_quit = 0;
 620                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 621                     }
 622                   cursor++;
 623                 }
 624             }
 625
 626           start = BYTE_TO_CHAR (start_byte + cursor - base);
 627         }
 628       }
 629   else
 630     while (start > end)
 631       {
 632         /* The last character to check before the next obstacle.  */
 633         int ceiling_byte = CHAR_TO_BYTE (end);
 634         int start_byte = CHAR_TO_BYTE (start);
 635         int tem;
 636
 637         /* Consult the newline cache, if appropriate.  */
 638         if (target == '\n' && newline_cache)
 639           {
 640             int next_change;
 641             immediate_quit = 0;
 642             while (region_cache_backward
 643                    (current_buffer, newline_cache, start_byte, &next_change))
 644               start_byte = next_change;
 645             immediate_quit = allow_quit;
 646
 647             /* Start should never be at or before end.  */
 648             if (start_byte <= ceiling_byte)
 649               start_byte = ceiling_byte + 1;
 650
 651             /* Now the text before start is an unknown region, and
 652                next_change is the position of the next known region. */
 653             ceiling_byte = max (next_change, ceiling_byte);
 654           }
 655
 656         /* Stop scanning before the gap.  */
 657         tem = BUFFER_FLOOR_OF (start_byte - 1);
 658         ceiling_byte = max (tem, ceiling_byte);
 659
 660         {
 661           /* The termination address of the dumb loop.  */
 662           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 663           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 664           unsigned char *base = cursor;
 665
 666           while (cursor >= ceiling_addr)
 667             {
 668               unsigned char *scan_start = cursor;
 669
 670               while (*cursor != target && --cursor >= ceiling_addr)
 671                 ;
 672
 673               /* If we're looking for newlines, cache the fact that
 674                  the region from after the cursor to start is free of them.  */
 675               if (target == '\n' && newline_cache)
 676                 know_region_cache (current_buffer, newline_cache,
 677                                    start_byte + cursor - base,
 678                                    start_byte + scan_start - base);
 679
 680               /* Did we find the target character?  */
 681               if (cursor >= ceiling_addr)
 682                 {
 683                   if (++count >= 0)
 684                     {
 685                       immediate_quit = 0;
 686                       return BYTE_TO_CHAR (start_byte + cursor - base);
 687                     }
 688                   cursor--;
 689                 }
 690             }
 691
 692           start = BYTE_TO_CHAR (start_byte + cursor - base);
 693         }
 694       }
 695
 696   immediate_quit = 0;
 697   if (shortage != 0)
 698     *shortage = count * direction;
 699   return start;
 700 }
 701 \f
 702 /* Search for COUNT instances of a line boundary, which means either a
 703    newline or (if selective display enabled) a carriage return.
 704    Start at START.  If COUNT is negative, search backwards.
 705
 706    We report the resulting position by calling TEMP_SET_PT_BOTH.
 707
 708    If we find COUNT instances. we position after (always after,
 709    even if scanning backwards) the COUNTth match, and return 0.
 710
 711    If we don't find COUNT instances before reaching the end of the
 712    buffer (or the beginning, if scanning backwards), we return
 713    the number of line boundaries left unfound, and position at
 714    the limit we bumped up against.
 715
 716    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 717    except in special cases.  */
 718
 719 int
 720 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 721      int start, start_byte;
 722      int limit, limit_byte;
 723      register int count;
 724      int allow_quit;
 725 {
 726   int direction = ((count > 0) ? 1 : -1);
 727
 728   register unsigned char *cursor;
 729   unsigned char *base;
 730
 731   register int ceiling;
 732   register unsigned char *ceiling_addr;
 733
 734   int old_immediate_quit = immediate_quit;
 735
 736   /* If we are not in selective display mode,
 737      check only for newlines.  */
 738   int selective_display = (!NILP (current_buffer->selective_display)
 739                            && !INTEGERP (current_buffer->selective_display));
 740
 741   /* The code that follows is like scan_buffer
 742      but checks for either newline or carriage return.  */
 743
 744   if (allow_quit)
 745     immediate_quit++;
 746
 747   start_byte = CHAR_TO_BYTE (start);
 748
 749   if (count > 0)
 750     {
 751       while (start_byte < limit_byte)
 752         {
 753           ceiling =  BUFFER_CEILING_OF (start_byte);
 754           ceiling = min (limit_byte - 1, ceiling);
 755           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 756           base = (cursor = BYTE_POS_ADDR (start_byte));
 757           while (1)
 758             {
 759               while (*cursor != '\n' && ++cursor != ceiling_addr)
 760                 ;
 761
 762               if (cursor != ceiling_addr)
 763                 {
 764                   if (--count == 0)
 765                     {
 766                       immediate_quit = old_immediate_quit;
 767                       start_byte = start_byte + cursor - base + 1;
 768                       start = BYTE_TO_CHAR (start_byte);
 769                       TEMP_SET_PT_BOTH (start, start_byte);
 770                       return 0;
 771                     }
 772                   else
 773                     if (++cursor == ceiling_addr)
 774                       break;
 775                 }
 776               else
 777                 break;
 778             }
 779           start_byte += cursor - base;
 780         }
 781     }
 782   else
 783     {
 784       while (start_byte > limit_byte)
 785         {
 786           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 787           ceiling = max (limit_byte, ceiling);
 788           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 789           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 790           while (1)
 791             {
 792               while (--cursor != ceiling_addr && *cursor != '\n')
 793                 ;
 794
 795               if (cursor != ceiling_addr)
 796                 {
 797                   if (++count == 0)
 798                     {
 799                       immediate_quit = old_immediate_quit;
 800                       /* Return the position AFTER the match we found.  */
 801                       start_byte = start_byte + cursor - base + 1;
 802                       start = BYTE_TO_CHAR (start_byte);
 803                       TEMP_SET_PT_BOTH (start, start_byte);
 804                       return 0;
 805                     }
 806                 }
 807               else
 808                 break;
 809             }
 810           /* Here we add 1 to compensate for the last decrement
 811              of CURSOR, which took it past the valid range.  */
 812           start_byte += cursor - base + 1;
 813         }
 814     }
 815
 816   TEMP_SET_PT_BOTH (limit, limit_byte);
 817   immediate_quit = old_immediate_quit;
 818
 819   return count * direction;
 820 }
 821
 822 int
 823 find_next_newline_no_quit (from, cnt)
 824      register int from, cnt;
 825 {
 826   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 827 }
 828
 829 /* Like find_next_newline, but returns position before the newline,
 830    not after, and only search up to TO.  This isn't just
 831    find_next_newline (...)-1, because you might hit TO.  */
 832
 833 int
 834 find_before_next_newline (from, to, cnt)
 835      int from, to, cnt;
 836 {
 837   int shortage;
 838   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 839
 840   if (shortage == 0)
 841     pos--;
 842
 843   return pos;
 844 }
 845 \f
 846 /* Subroutines of Lisp buffer search functions. */
 847
 848 static Lisp_Object
 849 search_command (string, bound, noerror, count, direction, RE, posix)
 850      Lisp_Object string, bound, noerror, count;
 851      int direction;
 852      int RE;
 853      int posix;
 854 {
 855   register int np;
 856   int lim, lim_byte;
 857   int n = direction;
 858
 859   if (!NILP (count))
 860     {
 861       CHECK_NUMBER (count, 3);
 862       n *= XINT (count);
 863     }
 864
 865   CHECK_STRING (string, 0);
 866   if (NILP (bound))
 867     {
 868       if (n > 0)
 869         lim = ZV, lim_byte = ZV_BYTE;
 870       else
 871         lim = BEGV, lim_byte = BEGV_BYTE;
 872     }
 873   else
 874     {
 875       CHECK_NUMBER_COERCE_MARKER (bound, 1);
 876       lim = XINT (bound);
 877       if (n > 0 ? lim < PT : lim > PT)
 878         error ("Invalid search bound (wrong side of point)");
 879       if (lim > ZV)
 880         lim = ZV, lim_byte = ZV_BYTE;
 881       else if (lim < BEGV)
 882         lim = BEGV, lim_byte = BEGV_BYTE;
 883       else
 884         lim_byte = CHAR_TO_BYTE (lim);
 885     }
 886
 887   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 888                       (!NILP (current_buffer->case_fold_search)
 889                        ? current_buffer->case_canon_table
 890                        : Qnil),
 891                       (!NILP (current_buffer->case_fold_search)
 892                        ? current_buffer->case_eqv_table
 893                        : Qnil),
 894                       posix);
 895   if (np <= 0)
 896     {
 897       if (NILP (noerror))
 898         return signal_failure (string);
 899       if (!EQ (noerror, Qt))
 900         {
 901           if (lim < BEGV || lim > ZV)
 902             abort ();
 903           SET_PT_BOTH (lim, lim_byte);
 904           return Qnil;
 905 #if 0 /* This would be clean, but maybe programs depend on
 906          a value of nil here.  */
 907           np = lim;
 908 #endif
 909         }
 910       else
 911         return Qnil;
 912     }
 913
 914   if (np < BEGV || np > ZV)
 915     abort ();
 916
 917   SET_PT (np);
 918
 919   return make_number (np);
 920 }
 921 \f
 922 /* Return 1 if REGEXP it matches just one constant string.  */
 923
 924 static int
 925 trivial_regexp_p (regexp)
 926      Lisp_Object regexp;
 927 {
 928   int len = STRING_BYTES (XSTRING (regexp));
 929   unsigned char *s = XSTRING (regexp)->data;
 930   unsigned char c;
 931   while (--len >= 0)
 932     {
 933       switch (*s++)
 934         {
 935         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 936           return 0;
 937         case '\\':
 938           if (--len < 0)
 939             return 0;
 940           switch (*s++)
 941             {
 942             case '|': case '(': case ')': case '`': case '\'': case 'b':
 943             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 944             case 'S': case '=':
 945             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 946             case '1': case '2': case '3': case '4': case '5':
 947             case '6': case '7': case '8': case '9':
 948               return 0;
 949             }
 950         }
 951     }
 952   return 1;
 953 }
 954
 955 /* Search for the n'th occurrence of STRING in the current buffer,
 956    starting at position POS and stopping at position LIM,
 957    treating STRING as a literal string if RE is false or as
 958    a regular expression if RE is true.
 959
 960    If N is positive, searching is forward and LIM must be greater than POS.
 961    If N is negative, searching is backward and LIM must be less than POS.
 962
 963    Returns -x if x occurrences remain to be found (x > 0),
 964    or else the position at the beginning of the Nth occurrence
 965    (if searching backward) or the end (if searching forward).
 966
 967    POSIX is nonzero if we want full backtracking (POSIX style)
 968    for this pattern.  0 means backtrack only enough to get a valid match.  */
 969
 970 #define TRANSLATE(out, trt, d)                  \
 971 do                                              \
 972   {                                             \
 973     if (! NILP (trt))                           \
 974       {                                         \
 975         Lisp_Object temp;                       \
 976         temp = Faref (trt, make_number (d));    \
 977         if (INTEGERP (temp))                    \
 978           out = XINT (temp);                    \
 979         else                                    \
 980           out = d;                              \
 981       }                                         \
 982     else                                        \
 983       out = d;                                  \
 984   }                                             \
 985 while (0)
 986
 987 static int
 988 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
 989                RE, trt, inverse_trt, posix)
 990      Lisp_Object string;
 991      int pos;
 992      int pos_byte;
 993      int lim;
 994      int lim_byte;
 995      int n;
 996      int RE;
 997      Lisp_Object trt;
 998      Lisp_Object inverse_trt;
 999      int posix;
1000 {
1001   int len = XSTRING (string)->size;
1002   int len_byte = STRING_BYTES (XSTRING (string));
1003   register int i;
1004
1005   if (running_asynch_code)
1006     save_search_regs ();
1007
1008   /* Searching 0 times means don't move.  */
1009   /* Null string is found at starting position.  */
1010   if (len == 0 || n == 0)
1011     {
1012       set_search_regs (pos, 0);
1013       return pos;
1014     }
1015
1016   if (RE && !trivial_regexp_p (string))
1017     {
1018       unsigned char *p1, *p2;
1019       int s1, s2;
1020       struct re_pattern_buffer *bufp;
1021
1022       bufp = compile_pattern (string, &search_regs, trt, posix,
1023                               !NILP (current_buffer->enable_multibyte_characters));
1024
1025       immediate_quit = 1;       /* Quit immediately if user types ^G,
1026                                    because letting this function finish
1027                                    can take too long. */
1028       QUIT;                     /* Do a pending quit right away,
1029                                    to avoid paradoxical behavior */
1030       /* Get pointers and sizes of the two strings
1031          that make up the visible portion of the buffer. */
1032
1033       p1 = BEGV_ADDR;
1034       s1 = GPT_BYTE - BEGV_BYTE;
1035       p2 = GAP_END_ADDR;
1036       s2 = ZV_BYTE - GPT_BYTE;
1037       if (s1 < 0)
1038         {
1039           p2 = p1;
1040           s2 = ZV_BYTE - BEGV_BYTE;
1041           s1 = 0;
1042         }
1043       if (s2 < 0)
1044         {
1045           s1 = ZV_BYTE - BEGV_BYTE;
1046           s2 = 0;
1047         }
1048       re_match_object = Qnil;
1049
1050       while (n < 0)
1051         {
1052           int val;
1053           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1054                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1055                              &search_regs,
1056                              /* Don't allow match past current point */
1057                              pos_byte - BEGV_BYTE);
1058           if (val == -2)
1059             {
1060               matcher_overflow ();
1061             }
1062           if (val >= 0)
1063             {
1064               pos_byte = search_regs.start[0] + BEGV_BYTE;
1065               for (i = 0; i < search_regs.num_regs; i++)
1066                 if (search_regs.start[i] >= 0)
1067                   {
1068                     search_regs.start[i]
1069                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1070                     search_regs.end[i]
1071                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1072                   }
1073               XSETBUFFER (last_thing_searched, current_buffer);
1074               /* Set pos to the new position. */
1075               pos = search_regs.start[0];
1076             }
1077           else
1078             {
1079               immediate_quit = 0;
1080               return (n);
1081             }
1082           n++;
1083         }
1084       while (n > 0)
1085         {
1086           int val;
1087           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1088                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1089                              &search_regs,
1090                              lim_byte - BEGV_BYTE);
1091           if (val == -2)
1092             {
1093               matcher_overflow ();
1094             }
1095           if (val >= 0)
1096             {
1097               pos_byte = search_regs.end[0] + BEGV_BYTE;
1098               for (i = 0; i < search_regs.num_regs; i++)
1099                 if (search_regs.start[i] >= 0)
1100                   {
1101                     search_regs.start[i]
1102                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1103                     search_regs.end[i]
1104                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1105                   }
1106               XSETBUFFER (last_thing_searched, current_buffer);
1107               pos = search_regs.end[0];
1108             }
1109           else
1110             {
1111               immediate_quit = 0;
1112               return (0 - n);
1113             }
1114           n--;
1115         }
1116       immediate_quit = 0;
1117       return (pos);
1118     }
1119   else                          /* non-RE case */
1120     {
1121       unsigned char *raw_pattern, *pat;
1122       int raw_pattern_size;
1123       int raw_pattern_size_byte;
1124       unsigned char *patbuf;
1125       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1126       unsigned char *base_pat = XSTRING (string)->data;
1127       int charset_base = -1;
1128       int simple = 1;
1129
1130       /* MULTIBYTE says whether the text to be searched is multibyte.
1131          We must convert PATTERN to match that, or we will not really
1132          find things right.  */
1133
1134       if (multibyte == STRING_MULTIBYTE (string))
1135         {
1136           raw_pattern = (unsigned char *) XSTRING (string)->data;
1137           raw_pattern_size = XSTRING (string)->size;
1138           raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
1139         }
1140       else if (multibyte)
1141         {
1142           raw_pattern_size = XSTRING (string)->size;
1143           raw_pattern_size_byte
1144             = count_size_as_multibyte (XSTRING (string)->data,
1145                                        raw_pattern_size);
1146           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1147           copy_text (XSTRING (string)->data, raw_pattern,
1148                      XSTRING (string)->size, 0, 1);
1149         }
1150       else
1151         {
1152           /* Converting multibyte to single-byte.
1153
1154              ??? Perhaps this conversion should be done in a special way
1155              by subtracting nonascii-insert-offset from each non-ASCII char,
1156              so that only the multibyte chars which really correspond to
1157              the chosen single-byte character set can possibly match.  */
1158           raw_pattern_size = XSTRING (string)->size;
1159           raw_pattern_size_byte = XSTRING (string)->size;
1160           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1161           copy_text (XSTRING (string)->data, raw_pattern,
1162                      STRING_BYTES (XSTRING (string)), 1, 0);
1163         }
1164
1165       /* Copy and optionally translate the pattern.  */
1166       len = raw_pattern_size;
1167       len_byte = raw_pattern_size_byte;
1168       patbuf = (unsigned char *) alloca (len_byte);
1169       pat = patbuf;
1170       base_pat = raw_pattern;
1171       if (multibyte)
1172         {
1173           while (--len >= 0)
1174             {
1175               unsigned char workbuf[4], *str;
1176               int c, translated, inverse;
1177               int in_charlen, charlen;
1178
1179               /* If we got here and the RE flag is set, it's because we're
1180                  dealing with a regexp known to be trivial, so the backslash
1181                  just quotes the next character.  */
1182               if (RE && *base_pat == '\\')
1183                 {
1184                   len--;
1185                   len_byte--;
1186                   base_pat++;
1187                 }
1188
1189               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1190               /* Translate the character, if requested.  */
1191               TRANSLATE (translated, trt, c);
1192               /* If translation changed the byte-length, go back
1193                  to the original character.  */
1194               charlen = CHAR_STRING (translated, workbuf, str);
1195               if (in_charlen != charlen)
1196                 {
1197                   translated = c;
1198                   charlen = CHAR_STRING (c, workbuf, str);
1199                 }
1200
1201               TRANSLATE (inverse, inverse_trt, c);
1202
1203               /* Did this char actually get translated?
1204                  Would any other char get translated into it?  */
1205               if (translated != c || inverse != c)
1206                 {
1207                   /* Keep track of which character set row
1208                      contains the characters that need translation.  */
1209                   int charset_base_code = c & ~0xff;
1210                   if (charset_base == -1)
1211                     charset_base = charset_base_code;
1212                   else if (charset_base != charset_base_code)
1213                     /* If two different rows appear, needing translation,
1214                        then we cannot use boyer_moore search.  */
1215                     simple = 0;
1216                     /* ??? Handa: this must do simple = 0
1217                        if c is a composite character.  */
1218                 }
1219
1220               /* Store this character into the translated pattern.  */
1221               bcopy (str, pat, charlen);
1222               pat += charlen;
1223               base_pat += in_charlen;
1224               len_byte -= in_charlen;
1225             }
1226         }
1227       else
1228         {
1229           while (--len >= 0)
1230             {
1231               int c, translated, inverse;
1232
1233               /* If we got here and the RE flag is set, it's because we're
1234                  dealing with a regexp known to be trivial, so the backslash
1235                  just quotes the next character.  */
1236               if (RE && *base_pat == '\\')
1237                 {
1238                   len--;
1239                   base_pat++;
1240                 }
1241               c = *base_pat++;
1242               TRANSLATE (translated, trt, c);
1243               TRANSLATE (inverse, inverse_trt, c);
1244
1245               /* Did this char actually get translated?
1246                  Would any other char get translated into it?  */
1247               if (translated != c || inverse != c)
1248                 {
1249                   /* Keep track of which character set row
1250                      contains the characters that need translation.  */
1251                   int charset_base_code = c & ~0xff;
1252                   if (charset_base == -1)
1253                     charset_base = charset_base_code;
1254                   else if (charset_base != charset_base_code)
1255                     /* If two different rows appear, needing translation,
1256                        then we cannot use boyer_moore search.  */
1257                     simple = 0;
1258                 }
1259               *pat++ = translated;
1260             }
1261         }
1262
1263       len_byte = pat - patbuf;
1264       len = raw_pattern_size;
1265       pat = base_pat = patbuf;
1266
1267       if (simple)
1268         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1269                             pos, pos_byte, lim, lim_byte,
1270                             charset_base);
1271       else
1272         return simple_search (n, pat, len, len_byte, trt,
1273                               pos, pos_byte, lim, lim_byte);
1274     }
1275 }
1276 \f
1277 /* Do a simple string search N times for the string PAT,
1278    whose length is LEN/LEN_BYTE,
1279    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1280    TRT is the translation table.
1281
1282    Return the character position where the match is found.
1283    Otherwise, if M matches remained to be found, return -M.
1284
1285    This kind of search works regardless of what is in PAT and
1286    regardless of what is in TRT.  It is used in cases where
1287    boyer_moore cannot work.  */
1288
1289 static int
1290 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1291      int n;
1292      unsigned char *pat;
1293      int len, len_byte;
1294      Lisp_Object trt;
1295      int pos, pos_byte;
1296      int lim, lim_byte;
1297 {
1298   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1299   int forward = n > 0;
1300
1301   if (lim > pos && multibyte)
1302     while (n > 0)
1303       {
1304         while (1)
1305           {
1306             /* Try matching at position POS.  */
1307             int this_pos = pos;
1308             int this_pos_byte = pos_byte;
1309             int this_len = len;
1310             int this_len_byte = len_byte;
1311             unsigned char *p = pat;
1312             if (pos + len > lim)
1313               goto stop;
1314
1315             while (this_len > 0)
1316               {
1317                 int charlen, buf_charlen;
1318                 int pat_ch, buf_ch;
1319
1320                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1321                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1322                                                  ZV_BYTE - this_pos_byte,
1323                                                  buf_charlen);
1324                 TRANSLATE (buf_ch, trt, buf_ch);
1325
1326                 if (buf_ch != pat_ch)
1327                   break;
1328
1329                 this_len_byte -= charlen;
1330                 this_len--;
1331                 p += charlen;
1332
1333                 this_pos_byte += buf_charlen;
1334                 this_pos++;
1335               }
1336
1337             if (this_len == 0)
1338               {
1339                 pos += len;
1340                 pos_byte += len_byte;
1341                 break;
1342               }
1343
1344             INC_BOTH (pos, pos_byte);
1345           }
1346
1347         n--;
1348       }
1349   else if (lim > pos)
1350     while (n > 0)
1351       {
1352         while (1)
1353           {
1354             /* Try matching at position POS.  */
1355             int this_pos = pos;
1356             int this_len = len;
1357             unsigned char *p = pat;
1358
1359             if (pos + len > lim)
1360               goto stop;
1361
1362             while (this_len > 0)
1363               {
1364                 int pat_ch = *p++;
1365                 int buf_ch = FETCH_BYTE (this_pos);
1366                 TRANSLATE (buf_ch, trt, buf_ch);
1367
1368                 if (buf_ch != pat_ch)
1369                   break;
1370
1371                 this_len--;
1372                 this_pos++;
1373               }
1374
1375             if (this_len == 0)
1376               {
1377                 pos += len;
1378                 break;
1379               }
1380
1381             pos++;
1382           }
1383
1384         n--;
1385       }
1386   /* Backwards search.  */
1387   else if (lim < pos && multibyte)
1388     while (n < 0)
1389       {
1390         while (1)
1391           {
1392             /* Try matching at position POS.  */
1393             int this_pos = pos - len;
1394             int this_pos_byte = pos_byte - len_byte;
1395             int this_len = len;
1396             int this_len_byte = len_byte;
1397             unsigned char *p = pat;
1398
1399             if (pos - len < lim)
1400               goto stop;
1401
1402             while (this_len > 0)
1403               {
1404                 int charlen, buf_charlen;
1405                 int pat_ch, buf_ch;
1406
1407                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1408                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1409                                                  ZV_BYTE - this_pos_byte,
1410                                                  buf_charlen);
1411                 TRANSLATE (buf_ch, trt, buf_ch);
1412
1413                 if (buf_ch != pat_ch)
1414                   break;
1415
1416                 this_len_byte -= charlen;
1417                 this_len--;
1418                 p += charlen;
1419                 this_pos_byte += buf_charlen;
1420                 this_pos++;
1421               }
1422
1423             if (this_len == 0)
1424               {
1425                 pos -= len;
1426                 pos_byte -= len_byte;
1427                 break;
1428               }
1429
1430             DEC_BOTH (pos, pos_byte);
1431           }
1432
1433         n++;
1434       }
1435   else if (lim < pos)
1436     while (n < 0)
1437       {
1438         while (1)
1439           {
1440             /* Try matching at position POS.  */
1441             int this_pos = pos - len;
1442             int this_len = len;
1443             unsigned char *p = pat;
1444
1445             if (pos - len < lim)
1446               goto stop;
1447
1448             while (this_len > 0)
1449               {
1450                 int pat_ch = *p++;
1451                 int buf_ch = FETCH_BYTE (this_pos);
1452                 TRANSLATE (buf_ch, trt, buf_ch);
1453
1454                 if (buf_ch != pat_ch)
1455                   break;
1456                 this_len--;
1457                 this_pos++;
1458               }
1459
1460             if (this_len == 0)
1461               {
1462                 pos -= len;
1463                 break;
1464               }
1465
1466             pos--;
1467           }
1468
1469         n++;
1470       }
1471
1472  stop:
1473   if (n == 0)
1474     {
1475       if (forward)
1476         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1477       else
1478         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1479
1480       return pos;
1481     }
1482   else if (n > 0)
1483     return -n;
1484   else
1485     return n;
1486 }
1487 \f
1488 /* Do Boyer-Moore search N times for the string PAT,
1489    whose length is LEN/LEN_BYTE,
1490    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1491    DIRECTION says which direction we search in.
1492    TRT and INVERSE_TRT are translation tables.
1493
1494    This kind of search works if all the characters in PAT that have
1495    nontrivial translation are the same aside from the last byte.  This
1496    makes it possible to translate just the last byte of a character,
1497    and do so after just a simple test of the context.
1498
1499    If that criterion is not satisfied, do not call this function.  */
1500
1501 static int
1502 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1503              pos, pos_byte, lim, lim_byte, charset_base)
1504      int n;
1505      unsigned char *base_pat;
1506      int len, len_byte;
1507      Lisp_Object trt;
1508      Lisp_Object inverse_trt;
1509      int pos, pos_byte;
1510      int lim, lim_byte;
1511      int charset_base;
1512 {
1513   int direction = ((n > 0) ? 1 : -1);
1514   register int dirlen;
1515   int infinity, limit, k, stride_for_teases;
1516   register int *BM_tab;
1517   int *BM_tab_base;
1518   register unsigned char *cursor, *p_limit;
1519   register int i, j;
1520   unsigned char *pat, *pat_end;
1521   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1522
1523   unsigned char simple_translate[0400];
1524   int translate_prev_byte;
1525   int translate_anteprev_byte;
1526
1527 #ifdef C_ALLOCA
1528   int BM_tab_space[0400];
1529   BM_tab = &BM_tab_space[0];
1530 #else
1531   BM_tab = (int *) alloca (0400 * sizeof (int));
1532 #endif
1533   /* The general approach is that we are going to maintain that we know */
1534   /* the first (closest to the present position, in whatever direction */
1535   /* we're searching) character that could possibly be the last */
1536   /* (furthest from present position) character of a valid match.  We */
1537   /* advance the state of our knowledge by looking at that character */
1538   /* and seeing whether it indeed matches the last character of the */
1539   /* pattern.  If it does, we take a closer look.  If it does not, we */
1540   /* move our pointer (to putative last characters) as far as is */
1541   /* logically possible.  This amount of movement, which I call a */
1542   /* stride, will be the length of the pattern if the actual character */
1543   /* appears nowhere in the pattern, otherwise it will be the distance */
1544   /* from the last occurrence of that character to the end of the */
1545   /* pattern. */
1546   /* As a coding trick, an enormous stride is coded into the table for */
1547   /* characters that match the last character.  This allows use of only */
1548   /* a single test, a test for having gone past the end of the */
1549   /* permissible match region, to test for both possible matches (when */
1550   /* the stride goes past the end immediately) and failure to */
1551   /* match (where you get nudged past the end one stride at a time). */
1552
1553   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1554   /* is determined only by the last character of the putative match. */
1555   /* If that character does not match, we will stride the proper */
1556   /* distance to propose a match that superimposes it on the last */
1557   /* instance of a character that matches it (per trt), or misses */
1558   /* it entirely if there is none. */
1559
1560   dirlen = len_byte * direction;
1561   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1562
1563   /* Record position after the end of the pattern.  */
1564   pat_end = base_pat + len_byte;
1565   /* BASE_PAT points to a character that we start scanning from.
1566      It is the first character in a forward search,
1567      the last character in a backward search.  */
1568   if (direction < 0)
1569     base_pat = pat_end - 1;
1570
1571   BM_tab_base = BM_tab;
1572   BM_tab += 0400;
1573   j = dirlen;           /* to get it in a register */
1574   /* A character that does not appear in the pattern induces a */
1575   /* stride equal to the pattern length. */
1576   while (BM_tab_base != BM_tab)
1577     {
1578       *--BM_tab = j;
1579       *--BM_tab = j;
1580       *--BM_tab = j;
1581       *--BM_tab = j;
1582     }
1583
1584   /* We use this for translation, instead of TRT itself.
1585      We fill this in to handle the characters that actually
1586      occur in the pattern.  Others don't matter anyway!  */
1587   bzero (simple_translate, sizeof simple_translate);
1588   for (i = 0; i < 0400; i++)
1589     simple_translate[i] = i;
1590
1591   i = 0;
1592   while (i != infinity)
1593     {
1594       unsigned char *ptr = base_pat + i;
1595       i += direction;
1596       if (i == dirlen)
1597         i = infinity;
1598       if (! NILP (trt))
1599         {
1600           int ch;
1601           int untranslated;
1602           int this_translated = 1;
1603
1604           if (multibyte
1605               /* Is *PTR the last byte of a character?  */
1606               && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1607             {
1608               unsigned char *charstart = ptr;
1609               while (! CHAR_HEAD_P (*charstart))
1610                 charstart--;
1611               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1612               if (charset_base == (untranslated & ~0xff))
1613                 {
1614                   TRANSLATE (ch, trt, untranslated);
1615                   if (! CHAR_HEAD_P (*ptr))
1616                     {
1617                       translate_prev_byte = ptr[-1];
1618                       if (! CHAR_HEAD_P (translate_prev_byte))
1619                         translate_anteprev_byte = ptr[-2];
1620                     }
1621                 }
1622               else
1623                 {
1624                   this_translated = 0;
1625                   ch = *ptr;
1626                 }
1627             }
1628           else if (!multibyte)
1629             TRANSLATE (ch, trt, *ptr);
1630           else
1631             {
1632               ch = *ptr;
1633               this_translated = 0;
1634             }
1635
1636           if (ch > 0400)
1637             j = ((unsigned char) ch) | 0200;
1638           else
1639             j = (unsigned char) ch;
1640
1641           if (i == infinity)
1642             stride_for_teases = BM_tab[j];
1643
1644           BM_tab[j] = dirlen - i;
1645           /* A translation table is accompanied by its inverse -- see */
1646           /* comment following downcase_table for details */
1647           if (this_translated)
1648             {
1649               int starting_ch = ch;
1650               int starting_j = j;
1651               while (1)
1652                 {
1653                   TRANSLATE (ch, inverse_trt, ch);
1654                   if (ch > 0400)
1655                     j = ((unsigned char) ch) | 0200;
1656                   else
1657                     j = (unsigned char) ch;
1658
1659                   /* For all the characters that map into CH,
1660                      set up simple_translate to map the last byte
1661                      into STARTING_J.  */
1662                   simple_translate[j] = starting_j;
1663                   if (ch == starting_ch)
1664                     break;
1665                   BM_tab[j] = dirlen - i;
1666                 }
1667             }
1668         }
1669       else
1670         {
1671           j = *ptr;
1672
1673           if (i == infinity)
1674             stride_for_teases = BM_tab[j];
1675           BM_tab[j] = dirlen - i;
1676         }
1677       /* stride_for_teases tells how much to stride if we get a */
1678       /* match on the far character but are subsequently */
1679       /* disappointed, by recording what the stride would have been */
1680       /* for that character if the last character had been */
1681       /* different. */
1682     }
1683   infinity = dirlen - infinity;
1684   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1685   /* loop invariant - POS_BYTE points at where last char (first
1686      char if reverse) of pattern would align in a possible match.  */
1687   while (n != 0)
1688     {
1689       int tail_end;
1690       unsigned char *tail_end_ptr;
1691
1692       /* It's been reported that some (broken) compiler thinks that
1693          Boolean expressions in an arithmetic context are unsigned.
1694          Using an explicit ?1:0 prevents this.  */
1695       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1696           < 0)
1697         return (n * (0 - direction));
1698       /* First we do the part we can by pointers (maybe nothing) */
1699       QUIT;
1700       pat = base_pat;
1701       limit = pos_byte - dirlen + direction;
1702       if (direction > 0)
1703         {
1704           limit = BUFFER_CEILING_OF (limit);
1705           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1706              can take on without hitting edge of buffer or the gap.  */
1707           limit = min (limit, pos_byte + 20000);
1708           limit = min (limit, lim_byte - 1);
1709         }
1710       else
1711         {
1712           limit = BUFFER_FLOOR_OF (limit);
1713           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1714              can take on without hitting edge of buffer or the gap.  */
1715           limit = max (limit, pos_byte - 20000);
1716           limit = max (limit, lim_byte);
1717         }
1718       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1719       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1720
1721       if ((limit - pos_byte) * direction > 20)
1722         {
1723           unsigned char *p2;
1724
1725           p_limit = BYTE_POS_ADDR (limit);
1726           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1727           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1728           while (1)             /* use one cursor setting as long as i can */
1729             {
1730               if (direction > 0) /* worth duplicating */
1731                 {
1732                   /* Use signed comparison if appropriate
1733                      to make cursor+infinity sure to be > p_limit.
1734                      Assuming that the buffer lies in a range of addresses
1735                      that are all "positive" (as ints) or all "negative",
1736                      either kind of comparison will work as long
1737                      as we don't step by infinity.  So pick the kind
1738                      that works when we do step by infinity.  */
1739                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1740                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1741                       cursor += BM_tab[*cursor];
1742                   else
1743                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1744                       cursor += BM_tab[*cursor];
1745                 }
1746               else
1747                 {
1748                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1749                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1750                       cursor += BM_tab[*cursor];
1751                   else
1752                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1753                       cursor += BM_tab[*cursor];
1754                 }
1755 /* If you are here, cursor is beyond the end of the searched region. */
1756 /* This can happen if you match on the far character of the pattern, */
1757 /* because the "stride" of that character is infinity, a number able */
1758 /* to throw you well beyond the end of the search.  It can also */
1759 /* happen if you fail to match within the permitted region and would */
1760 /* otherwise try a character beyond that region */
1761               if ((cursor - p_limit) * direction <= len_byte)
1762                 break;  /* a small overrun is genuine */
1763               cursor -= infinity; /* large overrun = hit */
1764               i = dirlen - direction;
1765               if (! NILP (trt))
1766                 {
1767                   while ((i -= direction) + direction != 0)
1768                     {
1769                       int ch;
1770                       cursor -= direction;
1771                       /* Translate only the last byte of a character.  */
1772                       if (! multibyte
1773                           || ((cursor == tail_end_ptr
1774                                || CHAR_HEAD_P (cursor[1]))
1775                               && (CHAR_HEAD_P (cursor[0])
1776                                   || (translate_prev_byte == cursor[-1]
1777                                       && (CHAR_HEAD_P (translate_prev_byte)
1778                                           || translate_anteprev_byte == cursor[-2])))))
1779                         ch = simple_translate[*cursor];
1780                       else
1781                         ch = *cursor;
1782                       if (pat[i] != ch)
1783                         break;
1784                     }
1785                 }
1786               else
1787                 {
1788                   while ((i -= direction) + direction != 0)
1789                     {
1790                       cursor -= direction;
1791                       if (pat[i] != *cursor)
1792                         break;
1793                     }
1794                 }
1795               cursor += dirlen - i - direction; /* fix cursor */
1796               if (i + direction == 0)
1797                 {
1798                   int position;
1799
1800                   cursor -= direction;
1801
1802                   position = pos_byte + cursor - p2 + ((direction > 0)
1803                                                        ? 1 - len_byte : 0);
1804                   set_search_regs (position, len_byte);
1805
1806                   if ((n -= direction) != 0)
1807                     cursor += dirlen; /* to resume search */
1808                   else
1809                     return ((direction > 0)
1810                             ? search_regs.end[0] : search_regs.start[0]);
1811                 }
1812               else
1813                 cursor += stride_for_teases; /* <sigh> we lose -  */
1814             }
1815           pos_byte += cursor - p2;
1816         }
1817       else
1818         /* Now we'll pick up a clump that has to be done the hard */
1819         /* way because it covers a discontinuity */
1820         {
1821           limit = ((direction > 0)
1822                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1823                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1824           limit = ((direction > 0)
1825                    ? min (limit + len_byte, lim_byte - 1)
1826                    : max (limit - len_byte, lim_byte));
1827           /* LIMIT is now the last value POS_BYTE can have
1828              and still be valid for a possible match.  */
1829           while (1)
1830             {
1831               /* This loop can be coded for space rather than */
1832               /* speed because it will usually run only once. */
1833               /* (the reach is at most len + 21, and typically */
1834               /* does not exceed len) */
1835               while ((limit - pos_byte) * direction >= 0)
1836                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1837               /* now run the same tests to distinguish going off the */
1838               /* end, a match or a phony match. */
1839               if ((pos_byte - limit) * direction <= len_byte)
1840                 break;  /* ran off the end */
1841               /* Found what might be a match.
1842                  Set POS_BYTE back to last (first if reverse) pos.  */
1843               pos_byte -= infinity;
1844               i = dirlen - direction;
1845               while ((i -= direction) + direction != 0)
1846                 {
1847                   int ch;
1848                   unsigned char *ptr;
1849                   pos_byte -= direction;
1850                   ptr = BYTE_POS_ADDR (pos_byte);
1851                   /* Translate only the last byte of a character.  */
1852                   if (! multibyte
1853                       || ((ptr == tail_end_ptr
1854                            || CHAR_HEAD_P (ptr[1]))
1855                           && (CHAR_HEAD_P (ptr[0])
1856                               || (translate_prev_byte == ptr[-1]
1857                                   && (CHAR_HEAD_P (translate_prev_byte)
1858                                       || translate_anteprev_byte == ptr[-2])))))
1859                     ch = simple_translate[*ptr];
1860                   else
1861                     ch = *ptr;
1862                   if (pat[i] != ch)
1863                     break;
1864                 }
1865               /* Above loop has moved POS_BYTE part or all the way
1866                  back to the first pos (last pos if reverse).
1867                  Set it once again at the last (first if reverse) char.  */
1868               pos_byte += dirlen - i- direction;
1869               if (i + direction == 0)
1870                 {
1871                   int position;
1872                   pos_byte -= direction;
1873
1874                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1875
1876                   set_search_regs (position, len_byte);
1877
1878                   if ((n -= direction) != 0)
1879                     pos_byte += dirlen; /* to resume search */
1880                   else
1881                     return ((direction > 0)
1882                             ? search_regs.end[0] : search_regs.start[0]);
1883                 }
1884               else
1885                 pos_byte += stride_for_teases;
1886             }
1887           }
1888       /* We have done one clump.  Can we continue? */
1889       if ((lim_byte - pos_byte) * direction < 0)
1890         return ((0 - n) * direction);
1891     }
1892   return BYTE_TO_CHAR (pos_byte);
1893 }
1894
1895 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1896    for the overall match just found in the current buffer.
1897    Also clear out the match data for registers 1 and up.  */
1898
1899 static void
1900 set_search_regs (beg_byte, nbytes)
1901      int beg_byte, nbytes;
1902 {
1903   int i;
1904
1905   /* Make sure we have registers in which to store
1906      the match position.  */
1907   if (search_regs.num_regs == 0)
1908     {
1909       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1910       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1911       search_regs.num_regs = 2;
1912     }
1913
1914   /* Clear out the other registers.  */
1915   for (i = 1; i < search_regs.num_regs; i++)
1916     {
1917       search_regs.start[i] = -1;
1918       search_regs.end[i] = -1;
1919     }
1920
1921   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1922   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1923   XSETBUFFER (last_thing_searched, current_buffer);
1924 }
1925 \f
1926 /* Given a string of words separated by word delimiters,
1927   compute a regexp that matches those exact words
1928   separated by arbitrary punctuation.  */
1929
1930 static Lisp_Object
1931 wordify (string)
1932      Lisp_Object string;
1933 {
1934   register unsigned char *p, *o;
1935   register int i, i_byte, len, punct_count = 0, word_count = 0;
1936   Lisp_Object val;
1937   int prev_c = 0;
1938   int adjust;
1939
1940   CHECK_STRING (string, 0);
1941   p = XSTRING (string)->data;
1942   len = XSTRING (string)->size;
1943
1944   for (i = 0, i_byte = 0; i < len; )
1945     {
1946       int c;
1947
1948       if (STRING_MULTIBYTE (string))
1949         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1950       else
1951         c = XSTRING (string)->data[i++];
1952
1953       if (SYNTAX (c) != Sword)
1954         {
1955           punct_count++;
1956           if (i > 0 && SYNTAX (prev_c) == Sword)
1957             word_count++;
1958         }
1959
1960       prev_c = c;
1961     }
1962
1963   if (SYNTAX (prev_c) == Sword)
1964     word_count++;
1965   if (!word_count)
1966     return build_string ("");
1967
1968   adjust = - punct_count + 5 * (word_count - 1) + 4;
1969   val = make_uninit_multibyte_string (len + adjust,
1970                                       STRING_BYTES (XSTRING (string)) + adjust);
1971
1972   o = XSTRING (val)->data;
1973   *o++ = '\\';
1974   *o++ = 'b';
1975   prev_c = 0;
1976
1977   for (i = 0, i_byte = 0; i < len; )
1978     {
1979       int c;
1980       int i_byte_orig = i_byte;
1981
1982       if (STRING_MULTIBYTE (string))
1983         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1984       else
1985         c = XSTRING (string)->data[i++];
1986
1987       if (SYNTAX (c) == Sword)
1988         {
1989           bcopy (&XSTRING (string)->data[i_byte_orig], o,
1990                  i_byte - i_byte_orig);
1991           o += i_byte - i_byte_orig;
1992         }
1993       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
1994         {
1995           *o++ = '\\';
1996           *o++ = 'W';
1997           *o++ = '\\';
1998           *o++ = 'W';
1999           *o++ = '*';
2000         }
2001
2002       prev_c = c;
2003     }
2004
2005   *o++ = '\\';
2006   *o++ = 'b';
2007
2008   return val;
2009 }
2010 \f
2011 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2012   "MSearch backward: ",
2013   "Search backward from point for STRING.\n\
2014 Set point to the beginning of the occurrence found, and return point.\n\
2015 An optional second argument bounds the search; it is a buffer position.\n\
2016 The match found must not extend before that position.\n\
2017 Optional third argument, if t, means if fail just return nil (no error).\n\
2018  If not nil and not t, position at limit of search and return nil.\n\
2019 Optional fourth argument is repeat count--search for successive occurrences.\n\
2020 See also the functions `match-beginning', `match-end' and `replace-match'.")
2021   (string, bound, noerror, count)
2022      Lisp_Object string, bound, noerror, count;
2023 {
2024   return search_command (string, bound, noerror, count, -1, 0, 0);
2025 }
2026
2027 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2028   "Search forward from point for STRING.\n\
2029 Set point to the end of the occurrence found, and return point.\n\
2030 An optional second argument bounds the search; it is a buffer position.\n\
2031 The match found must not extend after that position.  nil is equivalent\n\
2032   to (point-max).\n\
2033 Optional third argument, if t, means if fail just return nil (no error).\n\
2034   If not nil and not t, move to limit of search and return nil.\n\
2035 Optional fourth argument is repeat count--search for successive occurrences.\n\
2036 See also the functions `match-beginning', `match-end' and `replace-match'.")
2037   (string, bound, noerror, count)
2038      Lisp_Object string, bound, noerror, count;
2039 {
2040   return search_command (string, bound, noerror, count, 1, 0, 0);
2041 }
2042
2043 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2044   "sWord search backward: ",
2045   "Search backward from point for STRING, ignoring differences in punctuation.\n\
2046 Set point to the beginning of the occurrence found, and return point.\n\
2047 An optional second argument bounds the search; it is a buffer position.\n\
2048 The match found must not extend before that position.\n\
2049 Optional third argument, if t, means if fail just return nil (no error).\n\
2050   If not nil and not t, move to limit of search and return nil.\n\
2051 Optional fourth argument is repeat count--search for successive occurrences.")
2052   (string, bound, noerror, count)
2053      Lisp_Object string, bound, noerror, count;
2054 {
2055   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2056 }
2057
2058 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2059   "sWord search: ",
2060   "Search forward from point for STRING, ignoring differences in punctuation.\n\
2061 Set point to the end of the occurrence found, and return point.\n\
2062 An optional second argument bounds the search; it is a buffer position.\n\
2063 The match found must not extend after that position.\n\
2064 Optional third argument, if t, means if fail just return nil (no error).\n\
2065   If not nil and not t, move to limit of search and return nil.\n\
2066 Optional fourth argument is repeat count--search for successive occurrences.")
2067   (string, bound, noerror, count)
2068      Lisp_Object string, bound, noerror, count;
2069 {
2070   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2071 }
2072
2073 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2074   "sRE search backward: ",
2075   "Search backward from point for match for regular expression REGEXP.\n\
2076 Set point to the beginning of the match, and return point.\n\
2077 The match found is the one starting last in the buffer\n\
2078 and yet ending before the origin of the search.\n\
2079 An optional second argument bounds the search; it is a buffer position.\n\
2080 The match found must start at or after that position.\n\
2081 Optional third argument, if t, means if fail just return nil (no error).\n\
2082   If not nil and not t, move to limit of search and return nil.\n\
2083 Optional fourth argument is repeat count--search for successive occurrences.\n\
2084 See also the functions `match-beginning', `match-end' and `replace-match'.")
2085   (regexp, bound, noerror, count)
2086      Lisp_Object regexp, bound, noerror, count;
2087 {
2088   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2089 }
2090
2091 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2092   "sRE search: ",
2093   "Search forward from point for regular expression REGEXP.\n\
2094 Set point to the end of the occurrence found, and return point.\n\
2095 An optional second argument bounds the search; it is a buffer position.\n\
2096 The match found must not extend after that position.\n\
2097 Optional third argument, if t, means if fail just return nil (no error).\n\
2098   If not nil and not t, move to limit of search and return nil.\n\
2099 Optional fourth argument is repeat count--search for successive occurrences.\n\
2100 See also the functions `match-beginning', `match-end' and `replace-match'.")
2101   (regexp, bound, noerror, count)
2102      Lisp_Object regexp, bound, noerror, count;
2103 {
2104   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2105 }
2106
2107 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2108   "sPosix search backward: ",
2109   "Search backward from point for match for regular expression REGEXP.\n\
2110 Find the longest match in accord with Posix regular expression rules.\n\
2111 Set point to the beginning of the match, and return point.\n\
2112 The match found is the one starting last in the buffer\n\
2113 and yet ending before the origin of the search.\n\
2114 An optional second argument bounds the search; it is a buffer position.\n\
2115 The match found must start at or after that position.\n\
2116 Optional third argument, if t, means if fail just return nil (no error).\n\
2117   If not nil and not t, move to limit of search and return nil.\n\
2118 Optional fourth argument is repeat count--search for successive occurrences.\n\
2119 See also the functions `match-beginning', `match-end' and `replace-match'.")
2120   (regexp, bound, noerror, count)
2121      Lisp_Object regexp, bound, noerror, count;
2122 {
2123   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2124 }
2125
2126 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2127   "sPosix search: ",
2128   "Search forward from point for regular expression REGEXP.\n\
2129 Find the longest match in accord with Posix regular expression rules.\n\
2130 Set point to the end of the occurrence found, and return point.\n\
2131 An optional second argument bounds the search; it is a buffer position.\n\
2132 The match found must not extend after that position.\n\
2133 Optional third argument, if t, means if fail just return nil (no error).\n\
2134   If not nil and not t, move to limit of search and return nil.\n\
2135 Optional fourth argument is repeat count--search for successive occurrences.\n\
2136 See also the functions `match-beginning', `match-end' and `replace-match'.")
2137   (regexp, bound, noerror, count)
2138      Lisp_Object regexp, bound, noerror, count;
2139 {
2140   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2141 }
2142 \f
2143 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2144   "Replace text matched by last search with NEWTEXT.\n\
2145 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2146 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2147 based on the replaced text.\n\
2148 If the replaced text has only capital letters\n\
2149 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2150 If the replaced text has at least one word starting with a capital letter,\n\
2151 then capitalize each word in NEWTEXT.\n\n\
2152 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2153 Otherwise treat `\\' as special:\n\
2154   `\\&' in NEWTEXT means substitute original matched text.\n\
2155   `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2156        If Nth parens didn't match, substitute nothing.\n\
2157   `\\\\' means insert one `\\'.\n\
2158 FIXEDCASE and LITERAL are optional arguments.\n\
2159 Leaves point at end of replacement text.\n\
2160 \n\
2161 The optional fourth argument STRING can be a string to modify.\n\
2162 In that case, this function creates and returns a new string\n\
2163 which is made by replacing the part of STRING that was matched.\n\
2164 \n\
2165 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2166 It says to replace just that subexpression instead of the whole match.\n\
2167 This is useful only after a regular expression search or match\n\
2168 since only regular expressions have distinguished subexpressions.")
2169   (newtext, fixedcase, literal, string, subexp)
2170      Lisp_Object newtext, fixedcase, literal, string, subexp;
2171 {
2172   enum { nochange, all_caps, cap_initial } case_action;
2173   register int pos, last;
2174   int some_multiletter_word;
2175   int some_lowercase;
2176   int some_uppercase;
2177   int some_nonuppercase_initial;
2178   register int c, prevc;
2179   int inslen;
2180   int sub;
2181   int opoint, newpoint;
2182
2183   CHECK_STRING (newtext, 0);
2184
2185   if (! NILP (string))
2186     CHECK_STRING (string, 4);
2187
2188   case_action = nochange;       /* We tried an initialization */
2189                                 /* but some C compilers blew it */
2190
2191   if (search_regs.num_regs <= 0)
2192     error ("replace-match called before any match found");
2193
2194   if (NILP (subexp))
2195     sub = 0;
2196   else
2197     {
2198       CHECK_NUMBER (subexp, 3);
2199       sub = XINT (subexp);
2200       if (sub < 0 || sub >= search_regs.num_regs)
2201         args_out_of_range (subexp, make_number (search_regs.num_regs));
2202     }
2203
2204   if (NILP (string))
2205     {
2206       if (search_regs.start[sub] < BEGV
2207           || search_regs.start[sub] > search_regs.end[sub]
2208           || search_regs.end[sub] > ZV)
2209         args_out_of_range (make_number (search_regs.start[sub]),
2210                            make_number (search_regs.end[sub]));
2211     }
2212   else
2213     {
2214       if (search_regs.start[sub] < 0
2215           || search_regs.start[sub] > search_regs.end[sub]
2216           || search_regs.end[sub] > XSTRING (string)->size)
2217         args_out_of_range (make_number (search_regs.start[sub]),
2218                            make_number (search_regs.end[sub]));
2219     }
2220
2221   if (NILP (fixedcase))
2222     {
2223       int beg;
2224       /* Decide how to casify by examining the matched text. */
2225
2226       if (NILP (string))
2227         last = CHAR_TO_BYTE (search_regs.end[sub]);
2228       else
2229         last = search_regs.end[sub];
2230
2231       if (NILP (string))
2232         beg = CHAR_TO_BYTE (search_regs.start[sub]);
2233       else
2234         beg = search_regs.start[sub];
2235
2236       prevc = '\n';
2237       case_action = all_caps;
2238
2239       /* some_multiletter_word is set nonzero if any original word
2240          is more than one letter long. */
2241       some_multiletter_word = 0;
2242       some_lowercase = 0;
2243       some_nonuppercase_initial = 0;
2244       some_uppercase = 0;
2245
2246       for (pos = beg; pos < last; pos++)
2247         {
2248           if (NILP (string))
2249             c = FETCH_BYTE (pos);
2250           else
2251             c = XSTRING (string)->data[pos];
2252
2253           if (LOWERCASEP (c))
2254             {
2255               /* Cannot be all caps if any original char is lower case */
2256
2257               some_lowercase = 1;
2258               if (SYNTAX (prevc) != Sword)
2259                 some_nonuppercase_initial = 1;
2260               else
2261                 some_multiletter_word = 1;
2262             }
2263           else if (!NOCASEP (c))
2264             {
2265               some_uppercase = 1;
2266               if (SYNTAX (prevc) != Sword)
2267                 ;
2268               else
2269                 some_multiletter_word = 1;
2270             }
2271           else
2272             {
2273               /* If the initial is a caseless word constituent,
2274                  treat that like a lowercase initial.  */
2275               if (SYNTAX (prevc) != Sword)
2276                 some_nonuppercase_initial = 1;
2277             }
2278
2279           prevc = c;
2280         }
2281
2282       /* Convert to all caps if the old text is all caps
2283          and has at least one multiletter word.  */
2284       if (! some_lowercase && some_multiletter_word)
2285         case_action = all_caps;
2286       /* Capitalize each word, if the old text has all capitalized words.  */
2287       else if (!some_nonuppercase_initial && some_multiletter_word)
2288         case_action = cap_initial;
2289       else if (!some_nonuppercase_initial && some_uppercase)
2290         /* Should x -> yz, operating on X, give Yz or YZ?
2291            We'll assume the latter.  */
2292         case_action = all_caps;
2293       else
2294         case_action = nochange;
2295     }
2296
2297   /* Do replacement in a string.  */
2298   if (!NILP (string))
2299     {
2300       Lisp_Object before, after;
2301
2302       before = Fsubstring (string, make_number (0),
2303                            make_number (search_regs.start[sub]));
2304       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2305
2306       /* Substitute parts of the match into NEWTEXT
2307          if desired.  */
2308       if (NILP (literal))
2309         {
2310           int lastpos = 0;
2311           int lastpos_byte = 0;
2312           /* We build up the substituted string in ACCUM.  */
2313           Lisp_Object accum;
2314           Lisp_Object middle;
2315           int pos_byte;
2316
2317           accum = Qnil;
2318
2319           for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
2320             {
2321               int substart = -1;
2322               int subend;
2323               int delbackslash = 0;
2324
2325               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2326
2327               if (c == '\\')
2328                 {
2329                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2330                   if (c == '&')
2331                     {
2332                       substart = search_regs.start[sub];
2333                       subend = search_regs.end[sub];
2334                     }
2335                   else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2336                     {
2337                       if (search_regs.start[c - '0'] >= 0)
2338                         {
2339                           substart = search_regs.start[c - '0'];
2340                           subend = search_regs.end[c - '0'];
2341                         }
2342                     }
2343                   else if (c == '\\')
2344                     delbackslash = 1;
2345                   else
2346                     error ("Invalid use of `\\' in replacement text");
2347                 }
2348               if (substart >= 0)
2349                 {
2350                   if (pos - 2 != lastpos)
2351                     middle = substring_both (newtext, lastpos,
2352                                              lastpos_byte,
2353                                              pos - 2, pos_byte - 2);
2354                   else
2355                     middle = Qnil;
2356                   accum = concat3 (accum, middle,
2357                                    Fsubstring (string,
2358                                                make_number (substart),
2359                                                make_number (subend)));
2360                   lastpos = pos;
2361                   lastpos_byte = pos_byte;
2362                 }
2363               else if (delbackslash)
2364                 {
2365                   middle = substring_both (newtext, lastpos,
2366                                            lastpos_byte,
2367                                            pos - 1, pos_byte - 1);
2368
2369                   accum = concat2 (accum, middle);
2370                   lastpos = pos;
2371                   lastpos_byte = pos_byte;
2372                 }
2373             }
2374
2375           if (pos != lastpos)
2376             middle = substring_both (newtext, lastpos,
2377                                      lastpos_byte,
2378                                      pos, pos_byte);
2379           else
2380             middle = Qnil;
2381
2382           newtext = concat2 (accum, middle);
2383         }
2384
2385       /* Do case substitution in NEWTEXT if desired.  */
2386       if (case_action == all_caps)
2387         newtext = Fupcase (newtext);
2388       else if (case_action == cap_initial)
2389         newtext = Fupcase_initials (newtext);
2390
2391       return concat3 (before, newtext, after);
2392     }
2393
2394   /* Record point, the move (quietly) to the start of the match.  */
2395   if (PT > search_regs.start[sub])
2396     opoint = PT - ZV;
2397   else
2398     opoint = PT;
2399
2400   TEMP_SET_PT (search_regs.start[sub]);
2401
2402   /* We insert the replacement text before the old text, and then
2403      delete the original text.  This means that markers at the
2404      beginning or end of the original will float to the corresponding
2405      position in the replacement.  */
2406   if (!NILP (literal))
2407     Finsert_and_inherit (1, &newtext);
2408   else
2409     {
2410       struct gcpro gcpro1;
2411       GCPRO1 (newtext);
2412
2413       for (pos = 0; pos < XSTRING (newtext)->size; pos++)
2414         {
2415           int offset = PT - search_regs.start[sub];
2416
2417           c = XSTRING (newtext)->data[pos];
2418           if (c == '\\')
2419             {
2420               c = XSTRING (newtext)->data[++pos];
2421               if (c == '&')
2422                 Finsert_buffer_substring
2423                   (Fcurrent_buffer (),
2424                    make_number (search_regs.start[sub] + offset),
2425                    make_number (search_regs.end[sub] + offset));
2426               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2427                 {
2428                   if (search_regs.start[c - '0'] >= 1)
2429                     Finsert_buffer_substring
2430                       (Fcurrent_buffer (),
2431                        make_number (search_regs.start[c - '0'] + offset),
2432                        make_number (search_regs.end[c - '0'] + offset));
2433                 }
2434               else if (c == '\\')
2435                 insert_char (c);
2436               else
2437                 error ("Invalid use of `\\' in replacement text");
2438             }
2439           else
2440             insert_char (c);
2441         }
2442       UNGCPRO;
2443     }
2444
2445   inslen = PT - (search_regs.start[sub]);
2446   del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
2447
2448   if (case_action == all_caps)
2449     Fupcase_region (make_number (PT - inslen), make_number (PT));
2450   else if (case_action == cap_initial)
2451     Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
2452
2453   newpoint = PT;
2454
2455   /* Put point back where it was in the text.  */
2456   if (opoint <= 0)
2457     TEMP_SET_PT (opoint + ZV);
2458   else
2459     TEMP_SET_PT (opoint);
2460
2461   /* Now move point "officially" to the start of the inserted replacement.  */
2462   move_if_not_intangible (newpoint);
2463
2464   return Qnil;
2465 }
2466 \f
2467 static Lisp_Object
2468 match_limit (num, beginningp)
2469      Lisp_Object num;
2470      int beginningp;
2471 {
2472   register int n;
2473
2474   CHECK_NUMBER (num, 0);
2475   n = XINT (num);
2476   if (n < 0 || n >= search_regs.num_regs)
2477     args_out_of_range (num, make_number (search_regs.num_regs));
2478   if (search_regs.num_regs <= 0
2479       || search_regs.start[n] < 0)
2480     return Qnil;
2481   return (make_number ((beginningp) ? search_regs.start[n]
2482                                     : search_regs.end[n]));
2483 }
2484
2485 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2486   "Return position of start of text matched by last search.\n\
2487 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2488   regexp.\n\
2489 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2490   SUBEXP pairs.\n\
2491 Zero means the entire text matched by the whole regexp or whole string.")
2492   (subexp)
2493      Lisp_Object subexp;
2494 {
2495   return match_limit (subexp, 1);
2496 }
2497
2498 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2499   "Return position of end of text matched by last search.\n\
2500 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2501   regexp.\n\
2502 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2503   SUBEXP pairs.\n\
2504 Zero means the entire text matched by the whole regexp or whole string.")
2505   (subexp)
2506      Lisp_Object subexp;
2507 {
2508   return match_limit (subexp, 0);
2509 }
2510
2511 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2512   "Return a list containing all info on what the last search matched.\n\
2513 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2514 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2515 if the last match was on a buffer; integers or nil if a string was matched.\n\
2516 Use `store-match-data' to reinstate the data in this list.\n\
2517 \n\
2518 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2519 \(rather than markers) to represent buffer positions.\n\
2520 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough\n\
2521 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2522   (integers, reuse)
2523      Lisp_Object integers, reuse;
2524 {
2525   Lisp_Object tail, prev;
2526   Lisp_Object *data;
2527   int i, len;
2528
2529   if (NILP (last_thing_searched))
2530     return Qnil;
2531
2532   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2533                                  * sizeof (Lisp_Object));
2534
2535   len = -1;
2536   for (i = 0; i < search_regs.num_regs; i++)
2537     {
2538       int start = search_regs.start[i];
2539       if (start >= 0)
2540         {
2541           if (EQ (last_thing_searched, Qt)
2542               || ! NILP (integers))
2543             {
2544               XSETFASTINT (data[2 * i], start);
2545               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2546             }
2547           else if (BUFFERP (last_thing_searched))
2548             {
2549               data[2 * i] = Fmake_marker ();
2550               Fset_marker (data[2 * i],
2551                            make_number (start),
2552                            last_thing_searched);
2553               data[2 * i + 1] = Fmake_marker ();
2554               Fset_marker (data[2 * i + 1],
2555                            make_number (search_regs.end[i]),
2556                            last_thing_searched);
2557             }
2558           else
2559             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2560             abort ();
2561
2562           len = i;
2563         }
2564       else
2565         data[2 * i] = data [2 * i + 1] = Qnil;
2566     }
2567
2568   /* If REUSE is not usable, cons up the values and return them.  */
2569   if (! CONSP (reuse))
2570     return Flist (2 * len + 2, data);
2571
2572   /* If REUSE is a list, store as many value elements as will fit
2573      into the elements of REUSE.  */
2574   for (i = 0, tail = reuse; CONSP (tail);
2575        i++, tail = XCONS (tail)->cdr)
2576     {
2577       if (i < 2 * len + 2)
2578         XCONS (tail)->car = data[i];
2579       else
2580         XCONS (tail)->car = Qnil;
2581       prev = tail;
2582     }
2583
2584   /* If we couldn't fit all value elements into REUSE,
2585      cons up the rest of them and add them to the end of REUSE.  */
2586   if (i < 2 * len + 2)
2587     XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2588
2589   return reuse;
2590 }
2591
2592
2593 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2594   "Set internal data on last search match from elements of LIST.\n\
2595 LIST should have been created by calling `match-data' previously.")
2596   (list)
2597      register Lisp_Object list;
2598 {
2599   register int i;
2600   register Lisp_Object marker;
2601
2602   if (running_asynch_code)
2603     save_search_regs ();
2604
2605   if (!CONSP (list) && !NILP (list))
2606     list = wrong_type_argument (Qconsp, list);
2607
2608   /* Unless we find a marker with a buffer in LIST, assume that this
2609      match data came from a string.  */
2610   last_thing_searched = Qt;
2611
2612   /* Allocate registers if they don't already exist.  */
2613   {
2614     int length = XFASTINT (Flength (list)) / 2;
2615
2616     if (length > search_regs.num_regs)
2617       {
2618         if (search_regs.num_regs == 0)
2619           {
2620             search_regs.start
2621               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2622             search_regs.end
2623               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2624           }
2625         else
2626           {
2627             search_regs.start
2628               = (regoff_t *) xrealloc (search_regs.start,
2629                                        length * sizeof (regoff_t));
2630             search_regs.end
2631               = (regoff_t *) xrealloc (search_regs.end,
2632                                        length * sizeof (regoff_t));
2633           }
2634
2635         search_regs.num_regs = length;
2636       }
2637   }
2638
2639   for (i = 0; i < search_regs.num_regs; i++)
2640     {
2641       marker = Fcar (list);
2642       if (NILP (marker))
2643         {
2644           search_regs.start[i] = -1;
2645           list = Fcdr (list);
2646         }
2647       else
2648         {
2649           if (MARKERP (marker))
2650             {
2651               if (XMARKER (marker)->buffer == 0)
2652                 XSETFASTINT (marker, 0);
2653               else
2654                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2655             }
2656
2657           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2658           search_regs.start[i] = XINT (marker);
2659           list = Fcdr (list);
2660
2661           marker = Fcar (list);
2662           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2663             XSETFASTINT (marker, 0);
2664
2665           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2666           search_regs.end[i] = XINT (marker);
2667         }
2668       list = Fcdr (list);
2669     }
2670
2671   return Qnil;
2672 }
2673
2674 /* If non-zero the match data have been saved in saved_search_regs
2675    during the execution of a sentinel or filter. */
2676 static int search_regs_saved;
2677 static struct re_registers saved_search_regs;
2678
2679 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2680    if asynchronous code (filter or sentinel) is running. */
2681 static void
2682 save_search_regs ()
2683 {
2684   if (!search_regs_saved)
2685     {
2686       saved_search_regs.num_regs = search_regs.num_regs;
2687       saved_search_regs.start = search_regs.start;
2688       saved_search_regs.end = search_regs.end;
2689       search_regs.num_regs = 0;
2690       search_regs.start = 0;
2691       search_regs.end = 0;
2692
2693       search_regs_saved = 1;
2694     }
2695 }
2696
2697 /* Called upon exit from filters and sentinels. */
2698 void
2699 restore_match_data ()
2700 {
2701   if (search_regs_saved)
2702     {
2703       if (search_regs.num_regs > 0)
2704         {
2705           xfree (search_regs.start);
2706           xfree (search_regs.end);
2707         }
2708       search_regs.num_regs = saved_search_regs.num_regs;
2709       search_regs.start = saved_search_regs.start;
2710       search_regs.end = saved_search_regs.end;
2711
2712       search_regs_saved = 0;
2713     }
2714 }
2715
2716 /* Quote a string to inactivate reg-expr chars */
2717
2718 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2719   "Return a regexp string which matches exactly STRING and nothing else.")
2720   (string)
2721      Lisp_Object string;
2722 {
2723   register unsigned char *in, *out, *end;
2724   register unsigned char *temp;
2725   int backslashes_added = 0;
2726
2727   CHECK_STRING (string, 0);
2728
2729   temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
2730
2731   /* Now copy the data into the new string, inserting escapes. */
2732
2733   in = XSTRING (string)->data;
2734   end = in + STRING_BYTES (XSTRING (string));
2735   out = temp;
2736
2737   for (; in != end; in++)
2738     {
2739       if (*in == '[' || *in == ']'
2740           || *in == '*' || *in == '.' || *in == '\\'
2741           || *in == '?' || *in == '+'
2742           || *in == '^' || *in == '$')
2743         *out++ = '\\', backslashes_added++;
2744       *out++ = *in;
2745     }
2746
2747   return make_specified_string (temp,
2748                                 XSTRING (string)->size + backslashes_added,
2749                                 out - temp,
2750                                 STRING_MULTIBYTE (string));
2751 }
2752 \f
2753 void
2754 syms_of_search ()
2755 {
2756   register int i;
2757
2758   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2759     {
2760       searchbufs[i].buf.allocated = 100;
2761       searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2762       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2763       searchbufs[i].regexp = Qnil;
2764       staticpro (&searchbufs[i].regexp);
2765       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2766     }
2767   searchbuf_head = &searchbufs[0];
2768
2769   Qsearch_failed = intern ("search-failed");
2770   staticpro (&Qsearch_failed);
2771   Qinvalid_regexp = intern ("invalid-regexp");
2772   staticpro (&Qinvalid_regexp);
2773
2774   Fput (Qsearch_failed, Qerror_conditions,
2775         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2776   Fput (Qsearch_failed, Qerror_message,
2777         build_string ("Search failed"));
2778
2779   Fput (Qinvalid_regexp, Qerror_conditions,
2780         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2781   Fput (Qinvalid_regexp, Qerror_message,
2782         build_string ("Invalid regexp"));
2783
2784   last_thing_searched = Qnil;
2785   staticpro (&last_thing_searched);
2786
2787   defsubr (&Slooking_at);
2788   defsubr (&Sposix_looking_at);
2789   defsubr (&Sstring_match);
2790   defsubr (&Sposix_string_match);
2791   defsubr (&Ssearch_forward);
2792   defsubr (&Ssearch_backward);
2793   defsubr (&Sword_search_forward);
2794   defsubr (&Sword_search_backward);
2795   defsubr (&Sre_search_forward);
2796   defsubr (&Sre_search_backward);
2797   defsubr (&Sposix_search_forward);
2798   defsubr (&Sposix_search_backward);
2799   defsubr (&Sreplace_match);
2800   defsubr (&Smatch_beginning);
2801   defsubr (&Smatch_end);
2802   defsubr (&Smatch_data);
2803   defsubr (&Sset_match_data);
2804   defsubr (&Sregexp_quote);
2805 }