code.delx.au - gnu-emacs/blob - src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86,87,93,94,97,98, 1999, 2004
   3              Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "charset.h"
  29 #include "region-cache.h"
  30 #include "commands.h"
  31 #include "blockinput.h"
  32 #include "intervals.h"
  33
  34 #include <sys/types.h>
  35 #include "regex.h"
  36
  37 #define REGEXP_CACHE_SIZE 20
  38
  39 /* If the regexp is non-nil, then the buffer contains the compiled form
  40    of that regexp, suitable for searching.  */
  41 struct regexp_cache
  42 {
  43   struct regexp_cache *next;
  44   Lisp_Object regexp, whitespace_regexp;
  45   struct re_pattern_buffer buf;
  46   char fastmap[0400];
  47   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  48   char posix;
  49 };
  50
  51 /* The instances of that struct.  */
  52 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  53
  54 /* The head of the linked list; points to the most recently used buffer.  */
  55 struct regexp_cache *searchbuf_head;
  56
  57
  58 /* Every call to re_match, etc., must pass &search_regs as the regs
  59    argument unless you can show it is unnecessary (i.e., if re_match
  60    is certainly going to be called again before region-around-match
  61    can be called).
  62
  63    Since the registers are now dynamically allocated, we need to make
  64    sure not to refer to the Nth register before checking that it has
  65    been allocated by checking search_regs.num_regs.
  66
  67    The regex code keeps track of whether it has allocated the search
  68    buffer using bits in the re_pattern_buffer.  This means that whenever
  69    you compile a new pattern, it completely forgets whether it has
  70    allocated any registers, and will allocate new registers the next
  71    time you call a searching or matching function.  Therefore, we need
  72    to call re_set_registers after compiling a new pattern or after
  73    setting the match registers, so that the regex functions will be
  74    able to free or re-allocate it properly.  */
  75 static struct re_registers search_regs;
  76
  77 /* The buffer in which the last search was performed, or
  78    Qt if the last search was done in a string;
  79    Qnil if no searching has been done yet.  */
  80 static Lisp_Object last_thing_searched;
  81
  82 /* error condition signaled when regexp compile_pattern fails */
  83
  84 Lisp_Object Qinvalid_regexp;
  85
  86 Lisp_Object Vsearch_spaces_regexp;
  87
  88 static void set_search_regs ();
  89 static void save_search_regs ();
  90 static int simple_search ();
  91 static int boyer_moore ();
  92 static int search_buffer ();
  93
  94 static void
  95 matcher_overflow ()
  96 {
  97   error ("Stack overflow in regexp matcher");
  98 }
  99
 100 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 101    PATTERN is the pattern to compile.
 102    CP is the place to put the result.
 103    TRANSLATE is a translation table for ignoring case, or nil for none.
 104    REGP is the structure that says where to store the "register"
 105    values that will result from matching this pattern.
 106    If it is 0, we should compile the pattern not to record any
 107    subexpression bounds.
 108    POSIX is nonzero if we want full backtracking (POSIX style)
 109    for this pattern.  0 means backtrack only enough to get a valid match.
 110    MULTIBYTE is nonzero if we want to handle multibyte characters in
 111    PATTERN.  0 means all multibyte characters are recognized just as
 112    sequences of binary data.
 113
 114    The behavior also depends on Vsearch_spaces_regexp.  */
 115
 116 static void
 117 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 118      struct regexp_cache *cp;
 119      Lisp_Object pattern;
 120      Lisp_Object translate;
 121      struct re_registers *regp;
 122      int posix;
 123      int multibyte;
 124 {
 125   unsigned char *raw_pattern;
 126   int raw_pattern_size;
 127   char *val;
 128   reg_syntax_t old;
 129
 130   /* MULTIBYTE says whether the text to be searched is multibyte.
 131      We must convert PATTERN to match that, or we will not really
 132      find things right.  */
 133
 134   if (multibyte == STRING_MULTIBYTE (pattern))
 135     {
 136       raw_pattern = (unsigned char *) SDATA (pattern);
 137       raw_pattern_size = SBYTES (pattern);
 138     }
 139   else if (multibyte)
 140     {
 141       raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
 142                                                   SCHARS (pattern));
 143       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 144       copy_text (SDATA (pattern), raw_pattern,
 145                  SCHARS (pattern), 0, 1);
 146     }
 147   else
 148     {
 149       /* Converting multibyte to single-byte.
 150
 151          ??? Perhaps this conversion should be done in a special way
 152          by subtracting nonascii-insert-offset from each non-ASCII char,
 153          so that only the multibyte chars which really correspond to
 154          the chosen single-byte character set can possibly match.  */
 155       raw_pattern_size = SCHARS (pattern);
 156       raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
 157       copy_text (SDATA (pattern), raw_pattern,
 158                  SBYTES (pattern), 1, 0);
 159     }
 160
 161   cp->regexp = Qnil;
 162   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 163   cp->posix = posix;
 164   cp->buf.multibyte = multibyte;
 165   cp->whitespace_regexp = Vsearch_spaces_regexp;
 166   BLOCK_INPUT;
 167   old = re_set_syntax (RE_SYNTAX_EMACS
 168                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 169
 170   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 171                             : SDATA (Vsearch_spaces_regexp));
 172
 173   val = (char *) re_compile_pattern ((char *)raw_pattern,
 174                                      raw_pattern_size, &cp->buf);
 175
 176   re_set_whitespace_regexp (NULL);
 177
 178   re_set_syntax (old);
 179   UNBLOCK_INPUT;
 180   if (val)
 181     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 182
 183   cp->regexp = Fcopy_sequence (pattern);
 184 }
 185
 186 /* Shrink each compiled regexp buffer in the cache
 187    to the size actually used right now.
 188    This is called from garbage collection.  */
 189
 190 void
 191 shrink_regexp_cache ()
 192 {
 193   struct regexp_cache *cp;
 194
 195   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 196     {
 197       cp->buf.allocated = cp->buf.used;
 198       cp->buf.buffer
 199         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 200     }
 201 }
 202
 203 /* Compile a regexp if necessary, but first check to see if there's one in
 204    the cache.
 205    PATTERN is the pattern to compile.
 206    TRANSLATE is a translation table for ignoring case, or nil for none.
 207    REGP is the structure that says where to store the "register"
 208    values that will result from matching this pattern.
 209    If it is 0, we should compile the pattern not to record any
 210    subexpression bounds.
 211    POSIX is nonzero if we want full backtracking (POSIX style)
 212    for this pattern.  0 means backtrack only enough to get a valid match.  */
 213
 214 struct re_pattern_buffer *
 215 compile_pattern (pattern, regp, translate, posix, multibyte)
 216      Lisp_Object pattern;
 217      struct re_registers *regp;
 218      Lisp_Object translate;
 219      int posix, multibyte;
 220 {
 221   struct regexp_cache *cp, **cpp;
 222
 223   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 224     {
 225       cp = *cpp;
 226       /* Entries are initialized to nil, and may be set to nil by
 227          compile_pattern_1 if the pattern isn't valid.  Don't apply
 228          string accessors in those cases.  However, compile_pattern_1
 229          is only applied to the cache entry we pick here to reuse.  So
 230          nil should never appear before a non-nil entry.  */
 231       if (NILP (cp->regexp))
 232         goto compile_it;
 233       if (SCHARS (cp->regexp) == SCHARS (pattern)
 234           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 235           && !NILP (Fstring_equal (cp->regexp, pattern))
 236           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 237           && cp->posix == posix
 238           && cp->buf.multibyte == multibyte
 239           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
 240         break;
 241
 242       /* If we're at the end of the cache, compile into the nil cell
 243          we found, or the last (least recently used) cell with a
 244          string value.  */
 245       if (cp->next == 0)
 246         {
 247         compile_it:
 248           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 249           break;
 250         }
 251     }
 252
 253   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 254      either because we found it in the cache or because we just compiled it.
 255      Move it to the front of the queue to mark it as most recently used.  */
 256   *cpp = cp->next;
 257   cp->next = searchbuf_head;
 258   searchbuf_head = cp;
 259
 260   /* Advise the searching functions about the space we have allocated
 261      for register data.  */
 262   if (regp)
 263     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 264
 265   return &cp->buf;
 266 }
 267
 268 /* Error condition used for failing searches */
 269 Lisp_Object Qsearch_failed;
 270
 271 Lisp_Object
 272 signal_failure (arg)
 273      Lisp_Object arg;
 274 {
 275   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 276   return Qnil;
 277 }
 278 \f
 279 static Lisp_Object
 280 looking_at_1 (string, posix)
 281      Lisp_Object string;
 282      int posix;
 283 {
 284   Lisp_Object val;
 285   unsigned char *p1, *p2;
 286   int s1, s2;
 287   register int i;
 288   struct re_pattern_buffer *bufp;
 289
 290   if (running_asynch_code)
 291     save_search_regs ();
 292
 293   CHECK_STRING (string);
 294   bufp = compile_pattern (string, &search_regs,
 295                           (!NILP (current_buffer->case_fold_search)
 296                            ? DOWNCASE_TABLE : Qnil),
 297                           posix,
 298                           !NILP (current_buffer->enable_multibyte_characters));
 299
 300   immediate_quit = 1;
 301   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 302
 303   /* Get pointers and sizes of the two strings
 304      that make up the visible portion of the buffer. */
 305
 306   p1 = BEGV_ADDR;
 307   s1 = GPT_BYTE - BEGV_BYTE;
 308   p2 = GAP_END_ADDR;
 309   s2 = ZV_BYTE - GPT_BYTE;
 310   if (s1 < 0)
 311     {
 312       p2 = p1;
 313       s2 = ZV_BYTE - BEGV_BYTE;
 314       s1 = 0;
 315     }
 316   if (s2 < 0)
 317     {
 318       s1 = ZV_BYTE - BEGV_BYTE;
 319       s2 = 0;
 320     }
 321
 322   re_match_object = Qnil;
 323
 324   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 325                   PT_BYTE - BEGV_BYTE, &search_regs,
 326                   ZV_BYTE - BEGV_BYTE);
 327   immediate_quit = 0;
 328
 329   if (i == -2)
 330     matcher_overflow ();
 331
 332   val = (0 <= i ? Qt : Qnil);
 333   if (i >= 0)
 334     for (i = 0; i < search_regs.num_regs; i++)
 335       if (search_regs.start[i] >= 0)
 336         {
 337           search_regs.start[i]
 338             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 339           search_regs.end[i]
 340             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 341         }
 342   XSETBUFFER (last_thing_searched, current_buffer);
 343   return val;
 344 }
 345
 346 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 347        doc: /* Return t if text after point matches regular expression REGEXP.
 348 This function modifies the match data that `match-beginning',
 349 `match-end' and `match-data' access; save and restore the match
 350 data if you want to preserve them.  */)
 351      (regexp)
 352      Lisp_Object regexp;
 353 {
 354   return looking_at_1 (regexp, 0);
 355 }
 356
 357 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 358        doc: /* Return t if text after point matches regular expression REGEXP.
 359 Find the longest match, in accord with Posix regular expression rules.
 360 This function modifies the match data that `match-beginning',
 361 `match-end' and `match-data' access; save and restore the match
 362 data if you want to preserve them.  */)
 363      (regexp)
 364      Lisp_Object regexp;
 365 {
 366   return looking_at_1 (regexp, 1);
 367 }
 368 \f
 369 static Lisp_Object
 370 string_match_1 (regexp, string, start, posix)
 371      Lisp_Object regexp, string, start;
 372      int posix;
 373 {
 374   int val;
 375   struct re_pattern_buffer *bufp;
 376   int pos, pos_byte;
 377   int i;
 378
 379   if (running_asynch_code)
 380     save_search_regs ();
 381
 382   CHECK_STRING (regexp);
 383   CHECK_STRING (string);
 384
 385   if (NILP (start))
 386     pos = 0, pos_byte = 0;
 387   else
 388     {
 389       int len = SCHARS (string);
 390
 391       CHECK_NUMBER (start);
 392       pos = XINT (start);
 393       if (pos < 0 && -pos <= len)
 394         pos = len + pos;
 395       else if (0 > pos || pos > len)
 396         args_out_of_range (string, start);
 397       pos_byte = string_char_to_byte (string, pos);
 398     }
 399
 400   bufp = compile_pattern (regexp, &search_regs,
 401                           (!NILP (current_buffer->case_fold_search)
 402                            ? DOWNCASE_TABLE : Qnil),
 403                           posix,
 404                           STRING_MULTIBYTE (string));
 405   immediate_quit = 1;
 406   re_match_object = string;
 407
 408   val = re_search (bufp, (char *) SDATA (string),
 409                    SBYTES (string), pos_byte,
 410                    SBYTES (string) - pos_byte,
 411                    &search_regs);
 412   immediate_quit = 0;
 413   last_thing_searched = Qt;
 414   if (val == -2)
 415     matcher_overflow ();
 416   if (val < 0) return Qnil;
 417
 418   for (i = 0; i < search_regs.num_regs; i++)
 419     if (search_regs.start[i] >= 0)
 420       {
 421         search_regs.start[i]
 422           = string_byte_to_char (string, search_regs.start[i]);
 423         search_regs.end[i]
 424           = string_byte_to_char (string, search_regs.end[i]);
 425       }
 426
 427   return make_number (string_byte_to_char (string, val));
 428 }
 429
 430 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 431        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 432 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 433 If third arg START is non-nil, start search at that index in STRING.
 434 For index of first char beyond the match, do (match-end 0).
 435 `match-end' and `match-beginning' also give indices of substrings
 436 matched by parenthesis constructs in the pattern.
 437
 438 You can use the function `match-string' to extract the substrings
 439 matched by the parenthesis constructions in REGEXP. */)
 440      (regexp, string, start)
 441      Lisp_Object regexp, string, start;
 442 {
 443   return string_match_1 (regexp, string, start, 0);
 444 }
 445
 446 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 447        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 448 Find the longest match, in accord with Posix regular expression rules.
 449 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 450 If third arg START is non-nil, start search at that index in STRING.
 451 For index of first char beyond the match, do (match-end 0).
 452 `match-end' and `match-beginning' also give indices of substrings
 453 matched by parenthesis constructs in the pattern.  */)
 454      (regexp, string, start)
 455      Lisp_Object regexp, string, start;
 456 {
 457   return string_match_1 (regexp, string, start, 1);
 458 }
 459
 460 /* Match REGEXP against STRING, searching all of STRING,
 461    and return the index of the match, or negative on failure.
 462    This does not clobber the match data.  */
 463
 464 int
 465 fast_string_match (regexp, string)
 466      Lisp_Object regexp, string;
 467 {
 468   int val;
 469   struct re_pattern_buffer *bufp;
 470
 471   bufp = compile_pattern (regexp, 0, Qnil,
 472                           0, STRING_MULTIBYTE (string));
 473   immediate_quit = 1;
 474   re_match_object = string;
 475
 476   val = re_search (bufp, (char *) SDATA (string),
 477                    SBYTES (string), 0,
 478                    SBYTES (string), 0);
 479   immediate_quit = 0;
 480   return val;
 481 }
 482
 483 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 484    and return the index of the match, or negative on failure.
 485    This does not clobber the match data.
 486    We assume that STRING contains single-byte characters.  */
 487
 488 extern Lisp_Object Vascii_downcase_table;
 489
 490 int
 491 fast_c_string_match_ignore_case (regexp, string)
 492      Lisp_Object regexp;
 493      const char *string;
 494 {
 495   int val;
 496   struct re_pattern_buffer *bufp;
 497   int len = strlen (string);
 498
 499   regexp = string_make_unibyte (regexp);
 500   re_match_object = Qt;
 501   bufp = compile_pattern (regexp, 0,
 502                           Vascii_downcase_table, 0,
 503                           0);
 504   immediate_quit = 1;
 505   val = re_search (bufp, string, len, 0, len, 0);
 506   immediate_quit = 0;
 507   return val;
 508 }
 509
 510 /* Like fast_string_match but ignore case.  */
 511
 512 int
 513 fast_string_match_ignore_case (regexp, string)
 514      Lisp_Object regexp, string;
 515 {
 516   int val;
 517   struct re_pattern_buffer *bufp;
 518
 519   bufp = compile_pattern (regexp, 0, Vascii_downcase_table,
 520                           0, STRING_MULTIBYTE (string));
 521   immediate_quit = 1;
 522   re_match_object = string;
 523
 524   val = re_search (bufp, (char *) SDATA (string),
 525                    SBYTES (string), 0,
 526                    SBYTES (string), 0);
 527   immediate_quit = 0;
 528   return val;
 529 }
 530 \f
 531 /* The newline cache: remembering which sections of text have no newlines.  */
 532
 533 /* If the user has requested newline caching, make sure it's on.
 534    Otherwise, make sure it's off.
 535    This is our cheezy way of associating an action with the change of
 536    state of a buffer-local variable.  */
 537 static void
 538 newline_cache_on_off (buf)
 539      struct buffer *buf;
 540 {
 541   if (NILP (buf->cache_long_line_scans))
 542     {
 543       /* It should be off.  */
 544       if (buf->newline_cache)
 545         {
 546           free_region_cache (buf->newline_cache);
 547           buf->newline_cache = 0;
 548         }
 549     }
 550   else
 551     {
 552       /* It should be on.  */
 553       if (buf->newline_cache == 0)
 554         buf->newline_cache = new_region_cache ();
 555     }
 556 }
 557
 558 \f
 559 /* Search for COUNT instances of the character TARGET between START and END.
 560
 561    If COUNT is positive, search forwards; END must be >= START.
 562    If COUNT is negative, search backwards for the -COUNTth instance;
 563       END must be <= START.
 564    If COUNT is zero, do anything you please; run rogue, for all I care.
 565
 566    If END is zero, use BEGV or ZV instead, as appropriate for the
 567    direction indicated by COUNT.
 568
 569    If we find COUNT instances, set *SHORTAGE to zero, and return the
 570    position past the COUNTth match.  Note that for reverse motion
 571    this is not the same as the usual convention for Emacs motion commands.
 572
 573    If we don't find COUNT instances before reaching END, set *SHORTAGE
 574    to the number of TARGETs left unfound, and return END.
 575
 576    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 577    except when inside redisplay.  */
 578
 579 int
 580 scan_buffer (target, start, end, count, shortage, allow_quit)
 581      register int target;
 582      int start, end;
 583      int count;
 584      int *shortage;
 585      int allow_quit;
 586 {
 587   struct region_cache *newline_cache;
 588   int direction;
 589
 590   if (count > 0)
 591     {
 592       direction = 1;
 593       if (! end) end = ZV;
 594     }
 595   else
 596     {
 597       direction = -1;
 598       if (! end) end = BEGV;
 599     }
 600
 601   newline_cache_on_off (current_buffer);
 602   newline_cache = current_buffer->newline_cache;
 603
 604   if (shortage != 0)
 605     *shortage = 0;
 606
 607   immediate_quit = allow_quit;
 608
 609   if (count > 0)
 610     while (start != end)
 611       {
 612         /* Our innermost scanning loop is very simple; it doesn't know
 613            about gaps, buffer ends, or the newline cache.  ceiling is
 614            the position of the last character before the next such
 615            obstacle --- the last character the dumb search loop should
 616            examine.  */
 617         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 618         int start_byte = CHAR_TO_BYTE (start);
 619         int tem;
 620
 621         /* If we're looking for a newline, consult the newline cache
 622            to see where we can avoid some scanning.  */
 623         if (target == '\n' && newline_cache)
 624           {
 625             int next_change;
 626             immediate_quit = 0;
 627             while (region_cache_forward
 628                    (current_buffer, newline_cache, start_byte, &next_change))
 629               start_byte = next_change;
 630             immediate_quit = allow_quit;
 631
 632             /* START should never be after END.  */
 633             if (start_byte > ceiling_byte)
 634               start_byte = ceiling_byte;
 635
 636             /* Now the text after start is an unknown region, and
 637                next_change is the position of the next known region. */
 638             ceiling_byte = min (next_change - 1, ceiling_byte);
 639           }
 640
 641         /* The dumb loop can only scan text stored in contiguous
 642            bytes. BUFFER_CEILING_OF returns the last character
 643            position that is contiguous, so the ceiling is the
 644            position after that.  */
 645         tem = BUFFER_CEILING_OF (start_byte);
 646         ceiling_byte = min (tem, ceiling_byte);
 647
 648         {
 649           /* The termination address of the dumb loop.  */
 650           register unsigned char *ceiling_addr
 651             = BYTE_POS_ADDR (ceiling_byte) + 1;
 652           register unsigned char *cursor
 653             = BYTE_POS_ADDR (start_byte);
 654           unsigned char *base = cursor;
 655
 656           while (cursor < ceiling_addr)
 657             {
 658               unsigned char *scan_start = cursor;
 659
 660               /* The dumb loop.  */
 661               while (*cursor != target && ++cursor < ceiling_addr)
 662                 ;
 663
 664               /* If we're looking for newlines, cache the fact that
 665                  the region from start to cursor is free of them. */
 666               if (target == '\n' && newline_cache)
 667                 know_region_cache (current_buffer, newline_cache,
 668                                    start_byte + scan_start - base,
 669                                    start_byte + cursor - base);
 670
 671               /* Did we find the target character?  */
 672               if (cursor < ceiling_addr)
 673                 {
 674                   if (--count == 0)
 675                     {
 676                       immediate_quit = 0;
 677                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 678                     }
 679                   cursor++;
 680                 }
 681             }
 682
 683           start = BYTE_TO_CHAR (start_byte + cursor - base);
 684         }
 685       }
 686   else
 687     while (start > end)
 688       {
 689         /* The last character to check before the next obstacle.  */
 690         int ceiling_byte = CHAR_TO_BYTE (end);
 691         int start_byte = CHAR_TO_BYTE (start);
 692         int tem;
 693
 694         /* Consult the newline cache, if appropriate.  */
 695         if (target == '\n' && newline_cache)
 696           {
 697             int next_change;
 698             immediate_quit = 0;
 699             while (region_cache_backward
 700                    (current_buffer, newline_cache, start_byte, &next_change))
 701               start_byte = next_change;
 702             immediate_quit = allow_quit;
 703
 704             /* Start should never be at or before end.  */
 705             if (start_byte <= ceiling_byte)
 706               start_byte = ceiling_byte + 1;
 707
 708             /* Now the text before start is an unknown region, and
 709                next_change is the position of the next known region. */
 710             ceiling_byte = max (next_change, ceiling_byte);
 711           }
 712
 713         /* Stop scanning before the gap.  */
 714         tem = BUFFER_FLOOR_OF (start_byte - 1);
 715         ceiling_byte = max (tem, ceiling_byte);
 716
 717         {
 718           /* The termination address of the dumb loop.  */
 719           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 720           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 721           unsigned char *base = cursor;
 722
 723           while (cursor >= ceiling_addr)
 724             {
 725               unsigned char *scan_start = cursor;
 726
 727               while (*cursor != target && --cursor >= ceiling_addr)
 728                 ;
 729
 730               /* If we're looking for newlines, cache the fact that
 731                  the region from after the cursor to start is free of them.  */
 732               if (target == '\n' && newline_cache)
 733                 know_region_cache (current_buffer, newline_cache,
 734                                    start_byte + cursor - base,
 735                                    start_byte + scan_start - base);
 736
 737               /* Did we find the target character?  */
 738               if (cursor >= ceiling_addr)
 739                 {
 740                   if (++count >= 0)
 741                     {
 742                       immediate_quit = 0;
 743                       return BYTE_TO_CHAR (start_byte + cursor - base);
 744                     }
 745                   cursor--;
 746                 }
 747             }
 748
 749           start = BYTE_TO_CHAR (start_byte + cursor - base);
 750         }
 751       }
 752
 753   immediate_quit = 0;
 754   if (shortage != 0)
 755     *shortage = count * direction;
 756   return start;
 757 }
 758 \f
 759 /* Search for COUNT instances of a line boundary, which means either a
 760    newline or (if selective display enabled) a carriage return.
 761    Start at START.  If COUNT is negative, search backwards.
 762
 763    We report the resulting position by calling TEMP_SET_PT_BOTH.
 764
 765    If we find COUNT instances. we position after (always after,
 766    even if scanning backwards) the COUNTth match, and return 0.
 767
 768    If we don't find COUNT instances before reaching the end of the
 769    buffer (or the beginning, if scanning backwards), we return
 770    the number of line boundaries left unfound, and position at
 771    the limit we bumped up against.
 772
 773    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 774    except in special cases.  */
 775
 776 int
 777 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 778      int start, start_byte;
 779      int limit, limit_byte;
 780      register int count;
 781      int allow_quit;
 782 {
 783   int direction = ((count > 0) ? 1 : -1);
 784
 785   register unsigned char *cursor;
 786   unsigned char *base;
 787
 788   register int ceiling;
 789   register unsigned char *ceiling_addr;
 790
 791   int old_immediate_quit = immediate_quit;
 792
 793   /* The code that follows is like scan_buffer
 794      but checks for either newline or carriage return.  */
 795
 796   if (allow_quit)
 797     immediate_quit++;
 798
 799   start_byte = CHAR_TO_BYTE (start);
 800
 801   if (count > 0)
 802     {
 803       while (start_byte < limit_byte)
 804         {
 805           ceiling =  BUFFER_CEILING_OF (start_byte);
 806           ceiling = min (limit_byte - 1, ceiling);
 807           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 808           base = (cursor = BYTE_POS_ADDR (start_byte));
 809           while (1)
 810             {
 811               while (*cursor != '\n' && ++cursor != ceiling_addr)
 812                 ;
 813
 814               if (cursor != ceiling_addr)
 815                 {
 816                   if (--count == 0)
 817                     {
 818                       immediate_quit = old_immediate_quit;
 819                       start_byte = start_byte + cursor - base + 1;
 820                       start = BYTE_TO_CHAR (start_byte);
 821                       TEMP_SET_PT_BOTH (start, start_byte);
 822                       return 0;
 823                     }
 824                   else
 825                     if (++cursor == ceiling_addr)
 826                       break;
 827                 }
 828               else
 829                 break;
 830             }
 831           start_byte += cursor - base;
 832         }
 833     }
 834   else
 835     {
 836       while (start_byte > limit_byte)
 837         {
 838           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 839           ceiling = max (limit_byte, ceiling);
 840           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 841           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 842           while (1)
 843             {
 844               while (--cursor != ceiling_addr && *cursor != '\n')
 845                 ;
 846
 847               if (cursor != ceiling_addr)
 848                 {
 849                   if (++count == 0)
 850                     {
 851                       immediate_quit = old_immediate_quit;
 852                       /* Return the position AFTER the match we found.  */
 853                       start_byte = start_byte + cursor - base + 1;
 854                       start = BYTE_TO_CHAR (start_byte);
 855                       TEMP_SET_PT_BOTH (start, start_byte);
 856                       return 0;
 857                     }
 858                 }
 859               else
 860                 break;
 861             }
 862           /* Here we add 1 to compensate for the last decrement
 863              of CURSOR, which took it past the valid range.  */
 864           start_byte += cursor - base + 1;
 865         }
 866     }
 867
 868   TEMP_SET_PT_BOTH (limit, limit_byte);
 869   immediate_quit = old_immediate_quit;
 870
 871   return count * direction;
 872 }
 873
 874 int
 875 find_next_newline_no_quit (from, cnt)
 876      register int from, cnt;
 877 {
 878   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 879 }
 880
 881 /* Like find_next_newline, but returns position before the newline,
 882    not after, and only search up to TO.  This isn't just
 883    find_next_newline (...)-1, because you might hit TO.  */
 884
 885 int
 886 find_before_next_newline (from, to, cnt)
 887      int from, to, cnt;
 888 {
 889   int shortage;
 890   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 891
 892   if (shortage == 0)
 893     pos--;
 894
 895   return pos;
 896 }
 897 \f
 898 /* Subroutines of Lisp buffer search functions. */
 899
 900 static Lisp_Object
 901 search_command (string, bound, noerror, count, direction, RE, posix)
 902      Lisp_Object string, bound, noerror, count;
 903      int direction;
 904      int RE;
 905      int posix;
 906 {
 907   register int np;
 908   int lim, lim_byte;
 909   int n = direction;
 910
 911   if (!NILP (count))
 912     {
 913       CHECK_NUMBER (count);
 914       n *= XINT (count);
 915     }
 916
 917   CHECK_STRING (string);
 918   if (NILP (bound))
 919     {
 920       if (n > 0)
 921         lim = ZV, lim_byte = ZV_BYTE;
 922       else
 923         lim = BEGV, lim_byte = BEGV_BYTE;
 924     }
 925   else
 926     {
 927       CHECK_NUMBER_COERCE_MARKER (bound);
 928       lim = XINT (bound);
 929       if (n > 0 ? lim < PT : lim > PT)
 930         error ("Invalid search bound (wrong side of point)");
 931       if (lim > ZV)
 932         lim = ZV, lim_byte = ZV_BYTE;
 933       else if (lim < BEGV)
 934         lim = BEGV, lim_byte = BEGV_BYTE;
 935       else
 936         lim_byte = CHAR_TO_BYTE (lim);
 937     }
 938
 939   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 940                       (!NILP (current_buffer->case_fold_search)
 941                        ? current_buffer->case_canon_table
 942                        : Qnil),
 943                       (!NILP (current_buffer->case_fold_search)
 944                        ? current_buffer->case_eqv_table
 945                        : Qnil),
 946                       posix);
 947   if (np <= 0)
 948     {
 949       if (NILP (noerror))
 950         return signal_failure (string);
 951       if (!EQ (noerror, Qt))
 952         {
 953           if (lim < BEGV || lim > ZV)
 954             abort ();
 955           SET_PT_BOTH (lim, lim_byte);
 956           return Qnil;
 957 #if 0 /* This would be clean, but maybe programs depend on
 958          a value of nil here.  */
 959           np = lim;
 960 #endif
 961         }
 962       else
 963         return Qnil;
 964     }
 965
 966   if (np < BEGV || np > ZV)
 967     abort ();
 968
 969   SET_PT (np);
 970
 971   return make_number (np);
 972 }
 973 \f
 974 /* Return 1 if REGEXP it matches just one constant string.  */
 975
 976 static int
 977 trivial_regexp_p (regexp)
 978      Lisp_Object regexp;
 979 {
 980   int len = SBYTES (regexp);
 981   unsigned char *s = SDATA (regexp);
 982   while (--len >= 0)
 983     {
 984       switch (*s++)
 985         {
 986         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 987           return 0;
 988         case '\\':
 989           if (--len < 0)
 990             return 0;
 991           switch (*s++)
 992             {
 993             case '|': case '(': case ')': case '`': case '\'': case 'b':
 994             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 995             case 'S': case '=': case '{': case '}': case '_':
 996             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 997             case '1': case '2': case '3': case '4': case '5':
 998             case '6': case '7': case '8': case '9':
 999               return 0;
1000             }
1001         }
1002     }
1003   return 1;
1004 }
1005
1006 /* Search for the n'th occurrence of STRING in the current buffer,
1007    starting at position POS and stopping at position LIM,
1008    treating STRING as a literal string if RE is false or as
1009    a regular expression if RE is true.
1010
1011    If N is positive, searching is forward and LIM must be greater than POS.
1012    If N is negative, searching is backward and LIM must be less than POS.
1013
1014    Returns -x if x occurrences remain to be found (x > 0),
1015    or else the position at the beginning of the Nth occurrence
1016    (if searching backward) or the end (if searching forward).
1017
1018    POSIX is nonzero if we want full backtracking (POSIX style)
1019    for this pattern.  0 means backtrack only enough to get a valid match.  */
1020
1021 #define TRANSLATE(out, trt, d)                  \
1022 do                                              \
1023   {                                             \
1024     if (! NILP (trt))                           \
1025       {                                         \
1026         Lisp_Object temp;                       \
1027         temp = Faref (trt, make_number (d));    \
1028         if (INTEGERP (temp))                    \
1029           out = XINT (temp);                    \
1030         else                                    \
1031           out = d;                              \
1032       }                                         \
1033     else                                        \
1034       out = d;                                  \
1035   }                                             \
1036 while (0)
1037
1038 static int
1039 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1040                RE, trt, inverse_trt, posix)
1041      Lisp_Object string;
1042      int pos;
1043      int pos_byte;
1044      int lim;
1045      int lim_byte;
1046      int n;
1047      int RE;
1048      Lisp_Object trt;
1049      Lisp_Object inverse_trt;
1050      int posix;
1051 {
1052   int len = SCHARS (string);
1053   int len_byte = SBYTES (string);
1054   register int i;
1055
1056   if (running_asynch_code)
1057     save_search_regs ();
1058
1059   /* Searching 0 times means don't move.  */
1060   /* Null string is found at starting position.  */
1061   if (len == 0 || n == 0)
1062     {
1063       set_search_regs (pos_byte, 0);
1064       return pos;
1065     }
1066
1067   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1068     {
1069       unsigned char *p1, *p2;
1070       int s1, s2;
1071       struct re_pattern_buffer *bufp;
1072
1073       bufp = compile_pattern (string, &search_regs, trt, posix,
1074                               !NILP (current_buffer->enable_multibyte_characters));
1075
1076       immediate_quit = 1;       /* Quit immediately if user types ^G,
1077                                    because letting this function finish
1078                                    can take too long. */
1079       QUIT;                     /* Do a pending quit right away,
1080                                    to avoid paradoxical behavior */
1081       /* Get pointers and sizes of the two strings
1082          that make up the visible portion of the buffer. */
1083
1084       p1 = BEGV_ADDR;
1085       s1 = GPT_BYTE - BEGV_BYTE;
1086       p2 = GAP_END_ADDR;
1087       s2 = ZV_BYTE - GPT_BYTE;
1088       if (s1 < 0)
1089         {
1090           p2 = p1;
1091           s2 = ZV_BYTE - BEGV_BYTE;
1092           s1 = 0;
1093         }
1094       if (s2 < 0)
1095         {
1096           s1 = ZV_BYTE - BEGV_BYTE;
1097           s2 = 0;
1098         }
1099       re_match_object = Qnil;
1100
1101       while (n < 0)
1102         {
1103           int val;
1104           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1105                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1106                              &search_regs,
1107                              /* Don't allow match past current point */
1108                              pos_byte - BEGV_BYTE);
1109           if (val == -2)
1110             {
1111               matcher_overflow ();
1112             }
1113           if (val >= 0)
1114             {
1115               pos_byte = search_regs.start[0] + BEGV_BYTE;
1116               for (i = 0; i < search_regs.num_regs; i++)
1117                 if (search_regs.start[i] >= 0)
1118                   {
1119                     search_regs.start[i]
1120                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1121                     search_regs.end[i]
1122                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1123                   }
1124               XSETBUFFER (last_thing_searched, current_buffer);
1125               /* Set pos to the new position. */
1126               pos = search_regs.start[0];
1127             }
1128           else
1129             {
1130               immediate_quit = 0;
1131               return (n);
1132             }
1133           n++;
1134         }
1135       while (n > 0)
1136         {
1137           int val;
1138           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1139                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1140                              &search_regs,
1141                              lim_byte - BEGV_BYTE);
1142           if (val == -2)
1143             {
1144               matcher_overflow ();
1145             }
1146           if (val >= 0)
1147             {
1148               pos_byte = search_regs.end[0] + BEGV_BYTE;
1149               for (i = 0; i < search_regs.num_regs; i++)
1150                 if (search_regs.start[i] >= 0)
1151                   {
1152                     search_regs.start[i]
1153                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1154                     search_regs.end[i]
1155                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1156                   }
1157               XSETBUFFER (last_thing_searched, current_buffer);
1158               pos = search_regs.end[0];
1159             }
1160           else
1161             {
1162               immediate_quit = 0;
1163               return (0 - n);
1164             }
1165           n--;
1166         }
1167       immediate_quit = 0;
1168       return (pos);
1169     }
1170   else                          /* non-RE case */
1171     {
1172       unsigned char *raw_pattern, *pat;
1173       int raw_pattern_size;
1174       int raw_pattern_size_byte;
1175       unsigned char *patbuf;
1176       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1177       unsigned char *base_pat = SDATA (string);
1178       int charset_base = -1;
1179       int boyer_moore_ok = 1;
1180
1181       /* MULTIBYTE says whether the text to be searched is multibyte.
1182          We must convert PATTERN to match that, or we will not really
1183          find things right.  */
1184
1185       if (multibyte == STRING_MULTIBYTE (string))
1186         {
1187           raw_pattern = (unsigned char *) SDATA (string);
1188           raw_pattern_size = SCHARS (string);
1189           raw_pattern_size_byte = SBYTES (string);
1190         }
1191       else if (multibyte)
1192         {
1193           raw_pattern_size = SCHARS (string);
1194           raw_pattern_size_byte
1195             = count_size_as_multibyte (SDATA (string),
1196                                        raw_pattern_size);
1197           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1198           copy_text (SDATA (string), raw_pattern,
1199                      SCHARS (string), 0, 1);
1200         }
1201       else
1202         {
1203           /* Converting multibyte to single-byte.
1204
1205              ??? Perhaps this conversion should be done in a special way
1206              by subtracting nonascii-insert-offset from each non-ASCII char,
1207              so that only the multibyte chars which really correspond to
1208              the chosen single-byte character set can possibly match.  */
1209           raw_pattern_size = SCHARS (string);
1210           raw_pattern_size_byte = SCHARS (string);
1211           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1212           copy_text (SDATA (string), raw_pattern,
1213                      SBYTES (string), 1, 0);
1214         }
1215
1216       /* Copy and optionally translate the pattern.  */
1217       len = raw_pattern_size;
1218       len_byte = raw_pattern_size_byte;
1219       patbuf = (unsigned char *) alloca (len_byte);
1220       pat = patbuf;
1221       base_pat = raw_pattern;
1222       if (multibyte)
1223         {
1224           while (--len >= 0)
1225             {
1226               unsigned char str[MAX_MULTIBYTE_LENGTH];
1227               int c, translated, inverse;
1228               int in_charlen, charlen;
1229
1230               /* If we got here and the RE flag is set, it's because we're
1231                  dealing with a regexp known to be trivial, so the backslash
1232                  just quotes the next character.  */
1233               if (RE && *base_pat == '\\')
1234                 {
1235                   len--;
1236                   len_byte--;
1237                   base_pat++;
1238                 }
1239
1240               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1241
1242               /* Translate the character, if requested.  */
1243               TRANSLATE (translated, trt, c);
1244               /* If translation changed the byte-length, go back
1245                  to the original character.  */
1246               charlen = CHAR_STRING (translated, str);
1247               if (in_charlen != charlen)
1248                 {
1249                   translated = c;
1250                   charlen = CHAR_STRING (c, str);
1251                 }
1252
1253               /* If we are searching for something strange,
1254                  an invalid multibyte code, don't use boyer-moore.  */
1255               if (! ASCII_BYTE_P (translated)
1256                   && (charlen == 1 /* 8bit code */
1257                       || charlen != in_charlen /* invalid multibyte code */
1258                       ))
1259                 boyer_moore_ok = 0;
1260
1261               TRANSLATE (inverse, inverse_trt, c);
1262
1263               /* Did this char actually get translated?
1264                  Would any other char get translated into it?  */
1265               if (translated != c || inverse != c)
1266                 {
1267                   /* Keep track of which character set row
1268                      contains the characters that need translation.  */
1269                   int charset_base_code = c & ~CHAR_FIELD3_MASK;
1270                   int inverse_charset_base = inverse & ~CHAR_FIELD3_MASK;
1271
1272                   if (charset_base_code != inverse_charset_base)
1273                     boyer_moore_ok = 0;
1274                   else if (charset_base == -1)
1275                     charset_base = charset_base_code;
1276                   else if (charset_base != charset_base_code)
1277                     /* If two different rows appear, needing translation,
1278                        then we cannot use boyer_moore search.  */
1279                     boyer_moore_ok = 0;
1280                 }
1281
1282               /* Store this character into the translated pattern.  */
1283               bcopy (str, pat, charlen);
1284               pat += charlen;
1285               base_pat += in_charlen;
1286               len_byte -= in_charlen;
1287             }
1288         }
1289       else
1290         {
1291           /* Unibyte buffer.  */
1292           charset_base = 0;
1293           while (--len >= 0)
1294             {
1295               int c, translated;
1296
1297               /* If we got here and the RE flag is set, it's because we're
1298                  dealing with a regexp known to be trivial, so the backslash
1299                  just quotes the next character.  */
1300               if (RE && *base_pat == '\\')
1301                 {
1302                   len--;
1303                   base_pat++;
1304                 }
1305               c = *base_pat++;
1306               TRANSLATE (translated, trt, c);
1307               *pat++ = translated;
1308             }
1309         }
1310
1311       len_byte = pat - patbuf;
1312       len = raw_pattern_size;
1313       pat = base_pat = patbuf;
1314
1315       if (boyer_moore_ok)
1316         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1317                             pos, pos_byte, lim, lim_byte,
1318                             charset_base);
1319       else
1320         return simple_search (n, pat, len, len_byte, trt,
1321                               pos, pos_byte, lim, lim_byte);
1322     }
1323 }
1324 \f
1325 /* Do a simple string search N times for the string PAT,
1326    whose length is LEN/LEN_BYTE,
1327    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1328    TRT is the translation table.
1329
1330    Return the character position where the match is found.
1331    Otherwise, if M matches remained to be found, return -M.
1332
1333    This kind of search works regardless of what is in PAT and
1334    regardless of what is in TRT.  It is used in cases where
1335    boyer_moore cannot work.  */
1336
1337 static int
1338 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1339      int n;
1340      unsigned char *pat;
1341      int len, len_byte;
1342      Lisp_Object trt;
1343      int pos, pos_byte;
1344      int lim, lim_byte;
1345 {
1346   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1347   int forward = n > 0;
1348
1349   if (lim > pos && multibyte)
1350     while (n > 0)
1351       {
1352         while (1)
1353           {
1354             /* Try matching at position POS.  */
1355             int this_pos = pos;
1356             int this_pos_byte = pos_byte;
1357             int this_len = len;
1358             int this_len_byte = len_byte;
1359             unsigned char *p = pat;
1360             if (pos + len > lim)
1361               goto stop;
1362
1363             while (this_len > 0)
1364               {
1365                 int charlen, buf_charlen;
1366                 int pat_ch, buf_ch;
1367
1368                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1369                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1370                                                  ZV_BYTE - this_pos_byte,
1371                                                  buf_charlen);
1372                 TRANSLATE (buf_ch, trt, buf_ch);
1373
1374                 if (buf_ch != pat_ch)
1375                   break;
1376
1377                 this_len_byte -= charlen;
1378                 this_len--;
1379                 p += charlen;
1380
1381                 this_pos_byte += buf_charlen;
1382                 this_pos++;
1383               }
1384
1385             if (this_len == 0)
1386               {
1387                 pos += len;
1388                 pos_byte += len_byte;
1389                 break;
1390               }
1391
1392             INC_BOTH (pos, pos_byte);
1393           }
1394
1395         n--;
1396       }
1397   else if (lim > pos)
1398     while (n > 0)
1399       {
1400         while (1)
1401           {
1402             /* Try matching at position POS.  */
1403             int this_pos = pos;
1404             int this_len = len;
1405             unsigned char *p = pat;
1406
1407             if (pos + len > lim)
1408               goto stop;
1409
1410             while (this_len > 0)
1411               {
1412                 int pat_ch = *p++;
1413                 int buf_ch = FETCH_BYTE (this_pos);
1414                 TRANSLATE (buf_ch, trt, buf_ch);
1415
1416                 if (buf_ch != pat_ch)
1417                   break;
1418
1419                 this_len--;
1420                 this_pos++;
1421               }
1422
1423             if (this_len == 0)
1424               {
1425                 pos += len;
1426                 break;
1427               }
1428
1429             pos++;
1430           }
1431
1432         n--;
1433       }
1434   /* Backwards search.  */
1435   else if (lim < pos && multibyte)
1436     while (n < 0)
1437       {
1438         while (1)
1439           {
1440             /* Try matching at position POS.  */
1441             int this_pos = pos - len;
1442             int this_pos_byte = pos_byte - len_byte;
1443             int this_len = len;
1444             int this_len_byte = len_byte;
1445             unsigned char *p = pat;
1446
1447             if (pos - len < lim)
1448               goto stop;
1449
1450             while (this_len > 0)
1451               {
1452                 int charlen, buf_charlen;
1453                 int pat_ch, buf_ch;
1454
1455                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1456                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1457                                                  ZV_BYTE - this_pos_byte,
1458                                                  buf_charlen);
1459                 TRANSLATE (buf_ch, trt, buf_ch);
1460
1461                 if (buf_ch != pat_ch)
1462                   break;
1463
1464                 this_len_byte -= charlen;
1465                 this_len--;
1466                 p += charlen;
1467                 this_pos_byte += buf_charlen;
1468                 this_pos++;
1469               }
1470
1471             if (this_len == 0)
1472               {
1473                 pos -= len;
1474                 pos_byte -= len_byte;
1475                 break;
1476               }
1477
1478             DEC_BOTH (pos, pos_byte);
1479           }
1480
1481         n++;
1482       }
1483   else if (lim < pos)
1484     while (n < 0)
1485       {
1486         while (1)
1487           {
1488             /* Try matching at position POS.  */
1489             int this_pos = pos - len;
1490             int this_len = len;
1491             unsigned char *p = pat;
1492
1493             if (pos - len < lim)
1494               goto stop;
1495
1496             while (this_len > 0)
1497               {
1498                 int pat_ch = *p++;
1499                 int buf_ch = FETCH_BYTE (this_pos);
1500                 TRANSLATE (buf_ch, trt, buf_ch);
1501
1502                 if (buf_ch != pat_ch)
1503                   break;
1504                 this_len--;
1505                 this_pos++;
1506               }
1507
1508             if (this_len == 0)
1509               {
1510                 pos -= len;
1511                 break;
1512               }
1513
1514             pos--;
1515           }
1516
1517         n++;
1518       }
1519
1520  stop:
1521   if (n == 0)
1522     {
1523       if (forward)
1524         set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1525       else
1526         set_search_regs (multibyte ? pos_byte : pos, len_byte);
1527
1528       return pos;
1529     }
1530   else if (n > 0)
1531     return -n;
1532   else
1533     return n;
1534 }
1535 \f
1536 /* Do Boyer-Moore search N times for the string PAT,
1537    whose length is LEN/LEN_BYTE,
1538    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1539    DIRECTION says which direction we search in.
1540    TRT and INVERSE_TRT are translation tables.
1541
1542    This kind of search works if all the characters in PAT that have
1543    nontrivial translation are the same aside from the last byte.  This
1544    makes it possible to translate just the last byte of a character,
1545    and do so after just a simple test of the context.
1546
1547    If that criterion is not satisfied, do not call this function.  */
1548
1549 static int
1550 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1551              pos, pos_byte, lim, lim_byte, charset_base)
1552      int n;
1553      unsigned char *base_pat;
1554      int len, len_byte;
1555      Lisp_Object trt;
1556      Lisp_Object inverse_trt;
1557      int pos, pos_byte;
1558      int lim, lim_byte;
1559      int charset_base;
1560 {
1561   int direction = ((n > 0) ? 1 : -1);
1562   register int dirlen;
1563   int infinity, limit, stride_for_teases = 0;
1564   register int *BM_tab;
1565   int *BM_tab_base;
1566   register unsigned char *cursor, *p_limit;
1567   register int i, j;
1568   unsigned char *pat, *pat_end;
1569   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1570
1571   unsigned char simple_translate[0400];
1572   int translate_prev_byte = 0;
1573   int translate_anteprev_byte = 0;
1574
1575 #ifdef C_ALLOCA
1576   int BM_tab_space[0400];
1577   BM_tab = &BM_tab_space[0];
1578 #else
1579   BM_tab = (int *) alloca (0400 * sizeof (int));
1580 #endif
1581   /* The general approach is that we are going to maintain that we know */
1582   /* the first (closest to the present position, in whatever direction */
1583   /* we're searching) character that could possibly be the last */
1584   /* (furthest from present position) character of a valid match.  We */
1585   /* advance the state of our knowledge by looking at that character */
1586   /* and seeing whether it indeed matches the last character of the */
1587   /* pattern.  If it does, we take a closer look.  If it does not, we */
1588   /* move our pointer (to putative last characters) as far as is */
1589   /* logically possible.  This amount of movement, which I call a */
1590   /* stride, will be the length of the pattern if the actual character */
1591   /* appears nowhere in the pattern, otherwise it will be the distance */
1592   /* from the last occurrence of that character to the end of the */
1593   /* pattern. */
1594   /* As a coding trick, an enormous stride is coded into the table for */
1595   /* characters that match the last character.  This allows use of only */
1596   /* a single test, a test for having gone past the end of the */
1597   /* permissible match region, to test for both possible matches (when */
1598   /* the stride goes past the end immediately) and failure to */
1599   /* match (where you get nudged past the end one stride at a time). */
1600
1601   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1602   /* is determined only by the last character of the putative match. */
1603   /* If that character does not match, we will stride the proper */
1604   /* distance to propose a match that superimposes it on the last */
1605   /* instance of a character that matches it (per trt), or misses */
1606   /* it entirely if there is none. */
1607
1608   dirlen = len_byte * direction;
1609   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1610
1611   /* Record position after the end of the pattern.  */
1612   pat_end = base_pat + len_byte;
1613   /* BASE_PAT points to a character that we start scanning from.
1614      It is the first character in a forward search,
1615      the last character in a backward search.  */
1616   if (direction < 0)
1617     base_pat = pat_end - 1;
1618
1619   BM_tab_base = BM_tab;
1620   BM_tab += 0400;
1621   j = dirlen;           /* to get it in a register */
1622   /* A character that does not appear in the pattern induces a */
1623   /* stride equal to the pattern length. */
1624   while (BM_tab_base != BM_tab)
1625     {
1626       *--BM_tab = j;
1627       *--BM_tab = j;
1628       *--BM_tab = j;
1629       *--BM_tab = j;
1630     }
1631
1632   /* We use this for translation, instead of TRT itself.
1633      We fill this in to handle the characters that actually
1634      occur in the pattern.  Others don't matter anyway!  */
1635   bzero (simple_translate, sizeof simple_translate);
1636   for (i = 0; i < 0400; i++)
1637     simple_translate[i] = i;
1638
1639   i = 0;
1640   while (i != infinity)
1641     {
1642       unsigned char *ptr = base_pat + i;
1643       i += direction;
1644       if (i == dirlen)
1645         i = infinity;
1646       if (! NILP (trt))
1647         {
1648           int ch;
1649           int untranslated;
1650           int this_translated = 1;
1651
1652           if (multibyte
1653               /* Is *PTR the last byte of a character?  */
1654               && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1655             {
1656               unsigned char *charstart = ptr;
1657               while (! CHAR_HEAD_P (*charstart))
1658                 charstart--;
1659               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1660               if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
1661                 {
1662                   TRANSLATE (ch, trt, untranslated);
1663                   if (! CHAR_HEAD_P (*ptr))
1664                     {
1665                       translate_prev_byte = ptr[-1];
1666                       if (! CHAR_HEAD_P (translate_prev_byte))
1667                         translate_anteprev_byte = ptr[-2];
1668                     }
1669                 }
1670               else
1671                 {
1672                   this_translated = 0;
1673                   ch = *ptr;
1674                 }
1675             }
1676           else if (!multibyte)
1677             TRANSLATE (ch, trt, *ptr);
1678           else
1679             {
1680               ch = *ptr;
1681               this_translated = 0;
1682             }
1683
1684           if (ch > 0400)
1685             j = ((unsigned char) ch) | 0200;
1686           else
1687             j = (unsigned char) ch;
1688
1689           if (i == infinity)
1690             stride_for_teases = BM_tab[j];
1691
1692           BM_tab[j] = dirlen - i;
1693           /* A translation table is accompanied by its inverse -- see */
1694           /* comment following downcase_table for details */
1695           if (this_translated)
1696             {
1697               int starting_ch = ch;
1698               int starting_j = j;
1699               while (1)
1700                 {
1701                   TRANSLATE (ch, inverse_trt, ch);
1702                   if (ch > 0400)
1703                     j = ((unsigned char) ch) | 0200;
1704                   else
1705                     j = (unsigned char) ch;
1706
1707                   /* For all the characters that map into CH,
1708                      set up simple_translate to map the last byte
1709                      into STARTING_J.  */
1710                   simple_translate[j] = starting_j;
1711                   if (ch == starting_ch)
1712                     break;
1713                   BM_tab[j] = dirlen - i;
1714                 }
1715             }
1716         }
1717       else
1718         {
1719           j = *ptr;
1720
1721           if (i == infinity)
1722             stride_for_teases = BM_tab[j];
1723           BM_tab[j] = dirlen - i;
1724         }
1725       /* stride_for_teases tells how much to stride if we get a */
1726       /* match on the far character but are subsequently */
1727       /* disappointed, by recording what the stride would have been */
1728       /* for that character if the last character had been */
1729       /* different. */
1730     }
1731   infinity = dirlen - infinity;
1732   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1733   /* loop invariant - POS_BYTE points at where last char (first
1734      char if reverse) of pattern would align in a possible match.  */
1735   while (n != 0)
1736     {
1737       int tail_end;
1738       unsigned char *tail_end_ptr;
1739
1740       /* It's been reported that some (broken) compiler thinks that
1741          Boolean expressions in an arithmetic context are unsigned.
1742          Using an explicit ?1:0 prevents this.  */
1743       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1744           < 0)
1745         return (n * (0 - direction));
1746       /* First we do the part we can by pointers (maybe nothing) */
1747       QUIT;
1748       pat = base_pat;
1749       limit = pos_byte - dirlen + direction;
1750       if (direction > 0)
1751         {
1752           limit = BUFFER_CEILING_OF (limit);
1753           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1754              can take on without hitting edge of buffer or the gap.  */
1755           limit = min (limit, pos_byte + 20000);
1756           limit = min (limit, lim_byte - 1);
1757         }
1758       else
1759         {
1760           limit = BUFFER_FLOOR_OF (limit);
1761           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1762              can take on without hitting edge of buffer or the gap.  */
1763           limit = max (limit, pos_byte - 20000);
1764           limit = max (limit, lim_byte);
1765         }
1766       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1767       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1768
1769       if ((limit - pos_byte) * direction > 20)
1770         {
1771           unsigned char *p2;
1772
1773           p_limit = BYTE_POS_ADDR (limit);
1774           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1775           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1776           while (1)             /* use one cursor setting as long as i can */
1777             {
1778               if (direction > 0) /* worth duplicating */
1779                 {
1780                   /* Use signed comparison if appropriate
1781                      to make cursor+infinity sure to be > p_limit.
1782                      Assuming that the buffer lies in a range of addresses
1783                      that are all "positive" (as ints) or all "negative",
1784                      either kind of comparison will work as long
1785                      as we don't step by infinity.  So pick the kind
1786                      that works when we do step by infinity.  */
1787                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1788                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1789                       cursor += BM_tab[*cursor];
1790                   else
1791                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1792                       cursor += BM_tab[*cursor];
1793                 }
1794               else
1795                 {
1796                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1797                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1798                       cursor += BM_tab[*cursor];
1799                   else
1800                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1801                       cursor += BM_tab[*cursor];
1802                 }
1803 /* If you are here, cursor is beyond the end of the searched region. */
1804 /* This can happen if you match on the far character of the pattern, */
1805 /* because the "stride" of that character is infinity, a number able */
1806 /* to throw you well beyond the end of the search.  It can also */
1807 /* happen if you fail to match within the permitted region and would */
1808 /* otherwise try a character beyond that region */
1809               if ((cursor - p_limit) * direction <= len_byte)
1810                 break;  /* a small overrun is genuine */
1811               cursor -= infinity; /* large overrun = hit */
1812               i = dirlen - direction;
1813               if (! NILP (trt))
1814                 {
1815                   while ((i -= direction) + direction != 0)
1816                     {
1817                       int ch;
1818                       cursor -= direction;
1819                       /* Translate only the last byte of a character.  */
1820                       if (! multibyte
1821                           || ((cursor == tail_end_ptr
1822                                || CHAR_HEAD_P (cursor[1]))
1823                               && (CHAR_HEAD_P (cursor[0])
1824                                   || (translate_prev_byte == cursor[-1]
1825                                       && (CHAR_HEAD_P (translate_prev_byte)
1826                                           || translate_anteprev_byte == cursor[-2])))))
1827                         ch = simple_translate[*cursor];
1828                       else
1829                         ch = *cursor;
1830                       if (pat[i] != ch)
1831                         break;
1832                     }
1833                 }
1834               else
1835                 {
1836                   while ((i -= direction) + direction != 0)
1837                     {
1838                       cursor -= direction;
1839                       if (pat[i] != *cursor)
1840                         break;
1841                     }
1842                 }
1843               cursor += dirlen - i - direction; /* fix cursor */
1844               if (i + direction == 0)
1845                 {
1846                   int position;
1847
1848                   cursor -= direction;
1849
1850                   position = pos_byte + cursor - p2 + ((direction > 0)
1851                                                        ? 1 - len_byte : 0);
1852                   set_search_regs (position, len_byte);
1853
1854                   if ((n -= direction) != 0)
1855                     cursor += dirlen; /* to resume search */
1856                   else
1857                     return ((direction > 0)
1858                             ? search_regs.end[0] : search_regs.start[0]);
1859                 }
1860               else
1861                 cursor += stride_for_teases; /* <sigh> we lose -  */
1862             }
1863           pos_byte += cursor - p2;
1864         }
1865       else
1866         /* Now we'll pick up a clump that has to be done the hard */
1867         /* way because it covers a discontinuity */
1868         {
1869           limit = ((direction > 0)
1870                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1871                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1872           limit = ((direction > 0)
1873                    ? min (limit + len_byte, lim_byte - 1)
1874                    : max (limit - len_byte, lim_byte));
1875           /* LIMIT is now the last value POS_BYTE can have
1876              and still be valid for a possible match.  */
1877           while (1)
1878             {
1879               /* This loop can be coded for space rather than */
1880               /* speed because it will usually run only once. */
1881               /* (the reach is at most len + 21, and typically */
1882               /* does not exceed len) */
1883               while ((limit - pos_byte) * direction >= 0)
1884                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1885               /* now run the same tests to distinguish going off the */
1886               /* end, a match or a phony match. */
1887               if ((pos_byte - limit) * direction <= len_byte)
1888                 break;  /* ran off the end */
1889               /* Found what might be a match.
1890                  Set POS_BYTE back to last (first if reverse) pos.  */
1891               pos_byte -= infinity;
1892               i = dirlen - direction;
1893               while ((i -= direction) + direction != 0)
1894                 {
1895                   int ch;
1896                   unsigned char *ptr;
1897                   pos_byte -= direction;
1898                   ptr = BYTE_POS_ADDR (pos_byte);
1899                   /* Translate only the last byte of a character.  */
1900                   if (! multibyte
1901                       || ((ptr == tail_end_ptr
1902                            || CHAR_HEAD_P (ptr[1]))
1903                           && (CHAR_HEAD_P (ptr[0])
1904                               || (translate_prev_byte == ptr[-1]
1905                                   && (CHAR_HEAD_P (translate_prev_byte)
1906                                       || translate_anteprev_byte == ptr[-2])))))
1907                     ch = simple_translate[*ptr];
1908                   else
1909                     ch = *ptr;
1910                   if (pat[i] != ch)
1911                     break;
1912                 }
1913               /* Above loop has moved POS_BYTE part or all the way
1914                  back to the first pos (last pos if reverse).
1915                  Set it once again at the last (first if reverse) char.  */
1916               pos_byte += dirlen - i- direction;
1917               if (i + direction == 0)
1918                 {
1919                   int position;
1920                   pos_byte -= direction;
1921
1922                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1923
1924                   set_search_regs (position, len_byte);
1925
1926                   if ((n -= direction) != 0)
1927                     pos_byte += dirlen; /* to resume search */
1928                   else
1929                     return ((direction > 0)
1930                             ? search_regs.end[0] : search_regs.start[0]);
1931                 }
1932               else
1933                 pos_byte += stride_for_teases;
1934             }
1935           }
1936       /* We have done one clump.  Can we continue? */
1937       if ((lim_byte - pos_byte) * direction < 0)
1938         return ((0 - n) * direction);
1939     }
1940   return BYTE_TO_CHAR (pos_byte);
1941 }
1942
1943 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1944    for the overall match just found in the current buffer.
1945    Also clear out the match data for registers 1 and up.  */
1946
1947 static void
1948 set_search_regs (beg_byte, nbytes)
1949      int beg_byte, nbytes;
1950 {
1951   int i;
1952
1953   /* Make sure we have registers in which to store
1954      the match position.  */
1955   if (search_regs.num_regs == 0)
1956     {
1957       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1958       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1959       search_regs.num_regs = 2;
1960     }
1961
1962   /* Clear out the other registers.  */
1963   for (i = 1; i < search_regs.num_regs; i++)
1964     {
1965       search_regs.start[i] = -1;
1966       search_regs.end[i] = -1;
1967     }
1968
1969   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1970   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1971   XSETBUFFER (last_thing_searched, current_buffer);
1972 }
1973 \f
1974 /* Given a string of words separated by word delimiters,
1975   compute a regexp that matches those exact words
1976   separated by arbitrary punctuation.  */
1977
1978 static Lisp_Object
1979 wordify (string)
1980      Lisp_Object string;
1981 {
1982   register unsigned char *p, *o;
1983   register int i, i_byte, len, punct_count = 0, word_count = 0;
1984   Lisp_Object val;
1985   int prev_c = 0;
1986   int adjust;
1987
1988   CHECK_STRING (string);
1989   p = SDATA (string);
1990   len = SCHARS (string);
1991
1992   for (i = 0, i_byte = 0; i < len; )
1993     {
1994       int c;
1995
1996       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1997
1998       if (SYNTAX (c) != Sword)
1999         {
2000           punct_count++;
2001           if (i > 0 && SYNTAX (prev_c) == Sword)
2002             word_count++;
2003         }
2004
2005       prev_c = c;
2006     }
2007
2008   if (SYNTAX (prev_c) == Sword)
2009     word_count++;
2010   if (!word_count)
2011     return empty_string;
2012
2013   adjust = - punct_count + 5 * (word_count - 1) + 4;
2014   if (STRING_MULTIBYTE (string))
2015     val = make_uninit_multibyte_string (len + adjust,
2016                                         SBYTES (string)
2017                                         + adjust);
2018   else
2019     val = make_uninit_string (len + adjust);
2020
2021   o = SDATA (val);
2022   *o++ = '\\';
2023   *o++ = 'b';
2024   prev_c = 0;
2025
2026   for (i = 0, i_byte = 0; i < len; )
2027     {
2028       int c;
2029       int i_byte_orig = i_byte;
2030
2031       FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2032
2033       if (SYNTAX (c) == Sword)
2034         {
2035           bcopy (SDATA (string) + i_byte_orig, o,
2036                  i_byte - i_byte_orig);
2037           o += i_byte - i_byte_orig;
2038         }
2039       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2040         {
2041           *o++ = '\\';
2042           *o++ = 'W';
2043           *o++ = '\\';
2044           *o++ = 'W';
2045           *o++ = '*';
2046         }
2047
2048       prev_c = c;
2049     }
2050
2051   *o++ = '\\';
2052   *o++ = 'b';
2053
2054   return val;
2055 }
2056 \f
2057 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2058        "MSearch backward: ",
2059        doc: /* Search backward from point for STRING.
2060 Set point to the beginning of the occurrence found, and return point.
2061 An optional second argument bounds the search; it is a buffer position.
2062 The match found must not extend before that position.
2063 Optional third argument, if t, means if fail just return nil (no error).
2064  If not nil and not t, position at limit of search and return nil.
2065 Optional fourth argument is repeat count--search for successive occurrences.
2066
2067 Search case-sensitivity is determined by the value of the variable
2068 `case-fold-search', which see.
2069
2070 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2071      (string, bound, noerror, count)
2072      Lisp_Object string, bound, noerror, count;
2073 {
2074   return search_command (string, bound, noerror, count, -1, 0, 0);
2075 }
2076
2077 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2078        doc: /* Search forward from point for STRING.
2079 Set point to the end of the occurrence found, and return point.
2080 An optional second argument bounds the search; it is a buffer position.
2081 The match found must not extend after that position.  nil is equivalent
2082   to (point-max).
2083 Optional third argument, if t, means if fail just return nil (no error).
2084   If not nil and not t, move to limit of search and return nil.
2085 Optional fourth argument is repeat count--search for successive occurrences.
2086
2087 Search case-sensitivity is determined by the value of the variable
2088 `case-fold-search', which see.
2089
2090 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2091      (string, bound, noerror, count)
2092      Lisp_Object string, bound, noerror, count;
2093 {
2094   return search_command (string, bound, noerror, count, 1, 0, 0);
2095 }
2096
2097 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2098        "sWord search backward: ",
2099        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2100 Set point to the beginning of the occurrence found, and return point.
2101 An optional second argument bounds the search; it is a buffer position.
2102 The match found must not extend before that position.
2103 Optional third argument, if t, means if fail just return nil (no error).
2104   If not nil and not t, move to limit of search and return nil.
2105 Optional fourth argument is repeat count--search for successive occurrences.  */)
2106      (string, bound, noerror, count)
2107      Lisp_Object string, bound, noerror, count;
2108 {
2109   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2110 }
2111
2112 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2113        "sWord search: ",
2114        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2115 Set point to the end of the occurrence found, and return point.
2116 An optional second argument bounds the search; it is a buffer position.
2117 The match found must not extend after that position.
2118 Optional third argument, if t, means if fail just return nil (no error).
2119   If not nil and not t, move to limit of search and return nil.
2120 Optional fourth argument is repeat count--search for successive occurrences.  */)
2121      (string, bound, noerror, count)
2122      Lisp_Object string, bound, noerror, count;
2123 {
2124   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2125 }
2126
2127 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2128        "sRE search backward: ",
2129        doc: /* Search backward from point for match for regular expression REGEXP.
2130 Set point to the beginning of the match, and return point.
2131 The match found is the one starting last in the buffer
2132 and yet ending before the origin of the search.
2133 An optional second argument bounds the search; it is a buffer position.
2134 The match found must start at or after that position.
2135 Optional third argument, if t, means if fail just return nil (no error).
2136   If not nil and not t, move to limit of search and return nil.
2137 Optional fourth argument is repeat count--search for successive occurrences.
2138 See also the functions `match-beginning', `match-end', `match-string',
2139 and `replace-match'.  */)
2140      (regexp, bound, noerror, count)
2141      Lisp_Object regexp, bound, noerror, count;
2142 {
2143   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2144 }
2145
2146 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2147        "sRE search: ",
2148        doc: /* Search forward from point for regular expression REGEXP.
2149 Set point to the end of the occurrence found, and return point.
2150 An optional second argument bounds the search; it is a buffer position.
2151 The match found must not extend after that position.
2152 Optional third argument, if t, means if fail just return nil (no error).
2153   If not nil and not t, move to limit of search and return nil.
2154 Optional fourth argument is repeat count--search for successive occurrences.
2155 See also the functions `match-beginning', `match-end', `match-string',
2156 and `replace-match'.  */)
2157      (regexp, bound, noerror, count)
2158      Lisp_Object regexp, bound, noerror, count;
2159 {
2160   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2161 }
2162
2163 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2164        "sPosix search backward: ",
2165        doc: /* Search backward from point for match for regular expression REGEXP.
2166 Find the longest match in accord with Posix regular expression rules.
2167 Set point to the beginning of the match, and return point.
2168 The match found is the one starting last in the buffer
2169 and yet ending before the origin of the search.
2170 An optional second argument bounds the search; it is a buffer position.
2171 The match found must start at or after that position.
2172 Optional third argument, if t, means if fail just return nil (no error).
2173   If not nil and not t, move to limit of search and return nil.
2174 Optional fourth argument is repeat count--search for successive occurrences.
2175 See also the functions `match-beginning', `match-end', `match-string',
2176 and `replace-match'.  */)
2177      (regexp, bound, noerror, count)
2178      Lisp_Object regexp, bound, noerror, count;
2179 {
2180   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2181 }
2182
2183 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2184        "sPosix search: ",
2185        doc: /* Search forward from point for regular expression REGEXP.
2186 Find the longest match in accord with Posix regular expression rules.
2187 Set point to the end of the occurrence found, and return point.
2188 An optional second argument bounds the search; it is a buffer position.
2189 The match found must not extend after that position.
2190 Optional third argument, if t, means if fail just return nil (no error).
2191   If not nil and not t, move to limit of search and return nil.
2192 Optional fourth argument is repeat count--search for successive occurrences.
2193 See also the functions `match-beginning', `match-end', `match-string',
2194 and `replace-match'.  */)
2195      (regexp, bound, noerror, count)
2196      Lisp_Object regexp, bound, noerror, count;
2197 {
2198   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2199 }
2200 \f
2201 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2202        doc: /* Replace text matched by last search with NEWTEXT.
2203 Leave point at the end of the replacement text.
2204
2205 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2206 Otherwise maybe capitalize the whole text, or maybe just word initials,
2207 based on the replaced text.
2208 If the replaced text has only capital letters
2209 and has at least one multiletter word, convert NEWTEXT to all caps.
2210 Otherwise if all words are capitalized in the replaced text,
2211 capitalize each word in NEWTEXT.
2212
2213 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2214 Otherwise treat `\\' as special:
2215   `\\&' in NEWTEXT means substitute original matched text.
2216   `\\N' means substitute what matched the Nth `\\(...\\)'.
2217        If Nth parens didn't match, substitute nothing.
2218   `\\\\' means insert one `\\'.
2219 Case conversion does not apply to these substitutions.
2220
2221 FIXEDCASE and LITERAL are optional arguments.
2222
2223 The optional fourth argument STRING can be a string to modify.
2224 This is meaningful when the previous match was done against STRING,
2225 using `string-match'.  When used this way, `replace-match'
2226 creates and returns a new string made by copying STRING and replacing
2227 the part of STRING that was matched.
2228
2229 The optional fifth argument SUBEXP specifies a subexpression;
2230 it says to replace just that subexpression with NEWTEXT,
2231 rather than replacing the entire matched text.
2232 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2233 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2234 NEWTEXT in place of subexp N.
2235 This is useful only after a regular expression search or match,
2236 since only regular expressions have distinguished subexpressions.  */)
2237      (newtext, fixedcase, literal, string, subexp)
2238      Lisp_Object newtext, fixedcase, literal, string, subexp;
2239 {
2240   enum { nochange, all_caps, cap_initial } case_action;
2241   register int pos, pos_byte;
2242   int some_multiletter_word;
2243   int some_lowercase;
2244   int some_uppercase;
2245   int some_nonuppercase_initial;
2246   register int c, prevc;
2247   int sub;
2248   int opoint, newpoint;
2249
2250   CHECK_STRING (newtext);
2251
2252   if (! NILP (string))
2253     CHECK_STRING (string);
2254
2255   case_action = nochange;       /* We tried an initialization */
2256                                 /* but some C compilers blew it */
2257
2258   if (search_regs.num_regs <= 0)
2259     error ("replace-match called before any match found");
2260
2261   if (NILP (subexp))
2262     sub = 0;
2263   else
2264     {
2265       CHECK_NUMBER (subexp);
2266       sub = XINT (subexp);
2267       if (sub < 0 || sub >= search_regs.num_regs)
2268         args_out_of_range (subexp, make_number (search_regs.num_regs));
2269     }
2270
2271   if (NILP (string))
2272     {
2273       if (search_regs.start[sub] < BEGV
2274           || search_regs.start[sub] > search_regs.end[sub]
2275           || search_regs.end[sub] > ZV)
2276         args_out_of_range (make_number (search_regs.start[sub]),
2277                            make_number (search_regs.end[sub]));
2278     }
2279   else
2280     {
2281       if (search_regs.start[sub] < 0
2282           || search_regs.start[sub] > search_regs.end[sub]
2283           || search_regs.end[sub] > SCHARS (string))
2284         args_out_of_range (make_number (search_regs.start[sub]),
2285                            make_number (search_regs.end[sub]));
2286     }
2287
2288   if (NILP (fixedcase))
2289     {
2290       /* Decide how to casify by examining the matched text. */
2291       int last;
2292
2293       pos = search_regs.start[sub];
2294       last = search_regs.end[sub];
2295
2296       if (NILP (string))
2297         pos_byte = CHAR_TO_BYTE (pos);
2298       else
2299         pos_byte = string_char_to_byte (string, pos);
2300
2301       prevc = '\n';
2302       case_action = all_caps;
2303
2304       /* some_multiletter_word is set nonzero if any original word
2305          is more than one letter long. */
2306       some_multiletter_word = 0;
2307       some_lowercase = 0;
2308       some_nonuppercase_initial = 0;
2309       some_uppercase = 0;
2310
2311       while (pos < last)
2312         {
2313           if (NILP (string))
2314             {
2315               c = FETCH_CHAR (pos_byte);
2316               INC_BOTH (pos, pos_byte);
2317             }
2318           else
2319             FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2320
2321           if (LOWERCASEP (c))
2322             {
2323               /* Cannot be all caps if any original char is lower case */
2324
2325               some_lowercase = 1;
2326               if (SYNTAX (prevc) != Sword)
2327                 some_nonuppercase_initial = 1;
2328               else
2329                 some_multiletter_word = 1;
2330             }
2331           else if (!NOCASEP (c))
2332             {
2333               some_uppercase = 1;
2334               if (SYNTAX (prevc) != Sword)
2335                 ;
2336               else
2337                 some_multiletter_word = 1;
2338             }
2339           else
2340             {
2341               /* If the initial is a caseless word constituent,
2342                  treat that like a lowercase initial.  */
2343               if (SYNTAX (prevc) != Sword)
2344                 some_nonuppercase_initial = 1;
2345             }
2346
2347           prevc = c;
2348         }
2349
2350       /* Convert to all caps if the old text is all caps
2351          and has at least one multiletter word.  */
2352       if (! some_lowercase && some_multiletter_word)
2353         case_action = all_caps;
2354       /* Capitalize each word, if the old text has all capitalized words.  */
2355       else if (!some_nonuppercase_initial && some_multiletter_word)
2356         case_action = cap_initial;
2357       else if (!some_nonuppercase_initial && some_uppercase)
2358         /* Should x -> yz, operating on X, give Yz or YZ?
2359            We'll assume the latter.  */
2360         case_action = all_caps;
2361       else
2362         case_action = nochange;
2363     }
2364
2365   /* Do replacement in a string.  */
2366   if (!NILP (string))
2367     {
2368       Lisp_Object before, after;
2369
2370       before = Fsubstring (string, make_number (0),
2371                            make_number (search_regs.start[sub]));
2372       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2373
2374       /* Substitute parts of the match into NEWTEXT
2375          if desired.  */
2376       if (NILP (literal))
2377         {
2378           int lastpos = 0;
2379           int lastpos_byte = 0;
2380           /* We build up the substituted string in ACCUM.  */
2381           Lisp_Object accum;
2382           Lisp_Object middle;
2383           int length = SBYTES (newtext);
2384
2385           accum = Qnil;
2386
2387           for (pos_byte = 0, pos = 0; pos_byte < length;)
2388             {
2389               int substart = -1;
2390               int subend = 0;
2391               int delbackslash = 0;
2392
2393               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2394
2395               if (c == '\\')
2396                 {
2397                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2398
2399                   if (c == '&')
2400                     {
2401                       substart = search_regs.start[sub];
2402                       subend = search_regs.end[sub];
2403                     }
2404                   else if (c >= '1' && c <= '9')
2405                     {
2406                       if (search_regs.start[c - '0'] >= 0
2407                           && c <= search_regs.num_regs + '0')
2408                         {
2409                           substart = search_regs.start[c - '0'];
2410                           subend = search_regs.end[c - '0'];
2411                         }
2412                       else
2413                         {
2414                           /* If that subexp did not match,
2415                              replace \\N with nothing.  */
2416                           substart = 0;
2417                           subend = 0;
2418                         }
2419                     }
2420                   else if (c == '\\')
2421                     delbackslash = 1;
2422                   else
2423                     error ("Invalid use of `\\' in replacement text");
2424                 }
2425               if (substart >= 0)
2426                 {
2427                   if (pos - 2 != lastpos)
2428                     middle = substring_both (newtext, lastpos,
2429                                              lastpos_byte,
2430                                              pos - 2, pos_byte - 2);
2431                   else
2432                     middle = Qnil;
2433                   accum = concat3 (accum, middle,
2434                                    Fsubstring (string,
2435                                                make_number (substart),
2436                                                make_number (subend)));
2437                   lastpos = pos;
2438                   lastpos_byte = pos_byte;
2439                 }
2440               else if (delbackslash)
2441                 {
2442                   middle = substring_both (newtext, lastpos,
2443                                            lastpos_byte,
2444                                            pos - 1, pos_byte - 1);
2445
2446                   accum = concat2 (accum, middle);
2447                   lastpos = pos;
2448                   lastpos_byte = pos_byte;
2449                 }
2450             }
2451
2452           if (pos != lastpos)
2453             middle = substring_both (newtext, lastpos,
2454                                      lastpos_byte,
2455                                      pos, pos_byte);
2456           else
2457             middle = Qnil;
2458
2459           newtext = concat2 (accum, middle);
2460         }
2461
2462       /* Do case substitution in NEWTEXT if desired.  */
2463       if (case_action == all_caps)
2464         newtext = Fupcase (newtext);
2465       else if (case_action == cap_initial)
2466         newtext = Fupcase_initials (newtext);
2467
2468       return concat3 (before, newtext, after);
2469     }
2470
2471   /* Record point, then move (quietly) to the start of the match.  */
2472   if (PT >= search_regs.end[sub])
2473     opoint = PT - ZV;
2474   else if (PT > search_regs.start[sub])
2475     opoint = search_regs.end[sub] - ZV;
2476   else
2477     opoint = PT;
2478
2479   /* If we want non-literal replacement,
2480      perform substitution on the replacement string.  */
2481   if (NILP (literal))
2482     {
2483       int length = SBYTES (newtext);
2484       unsigned char *substed;
2485       int substed_alloc_size, substed_len;
2486       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2487       int str_multibyte = STRING_MULTIBYTE (newtext);
2488       Lisp_Object rev_tbl;
2489       int really_changed = 0;
2490
2491       rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2492                 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2493                                           make_number (0))
2494                 : Qnil);
2495
2496       substed_alloc_size = length * 2 + 100;
2497       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2498       substed_len = 0;
2499
2500       /* Go thru NEWTEXT, producing the actual text to insert in
2501          SUBSTED while adjusting multibyteness to that of the current
2502          buffer.  */
2503
2504       for (pos_byte = 0, pos = 0; pos_byte < length;)
2505         {
2506           unsigned char str[MAX_MULTIBYTE_LENGTH];
2507           unsigned char *add_stuff = NULL;
2508           int add_len = 0;
2509           int idx = -1;
2510
2511           if (str_multibyte)
2512             {
2513               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2514               if (!buf_multibyte)
2515                 c = multibyte_char_to_unibyte (c, rev_tbl);
2516             }
2517           else
2518             {
2519               /* Note that we don't have to increment POS.  */
2520               c = SREF (newtext, pos_byte++);
2521               if (buf_multibyte)
2522                 c = unibyte_char_to_multibyte (c);
2523             }
2524
2525           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2526              or set IDX to a match index, which means put that part
2527              of the buffer text into SUBSTED.  */
2528
2529           if (c == '\\')
2530             {
2531               really_changed = 1;
2532
2533               if (str_multibyte)
2534                 {
2535                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2536                                                       pos, pos_byte);
2537                   if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2538                     c = multibyte_char_to_unibyte (c, rev_tbl);
2539                 }
2540               else
2541                 {
2542                   c = SREF (newtext, pos_byte++);
2543                   if (buf_multibyte)
2544                     c = unibyte_char_to_multibyte (c);
2545                 }
2546
2547               if (c == '&')
2548                 idx = sub;
2549               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2550                 {
2551                   if (search_regs.start[c - '0'] >= 1)
2552                     idx = c - '0';
2553                 }
2554               else if (c == '\\')
2555                 add_len = 1, add_stuff = "\\";
2556               else
2557                 {
2558                   xfree (substed);
2559                   error ("Invalid use of `\\' in replacement text");
2560                 }
2561             }
2562           else
2563             {
2564               add_len = CHAR_STRING (c, str);
2565               add_stuff = str;
2566             }
2567
2568           /* If we want to copy part of a previous match,
2569              set up ADD_STUFF and ADD_LEN to point to it.  */
2570           if (idx >= 0)
2571             {
2572               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2573               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2574               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2575                 move_gap (search_regs.start[idx]);
2576               add_stuff = BYTE_POS_ADDR (begbyte);
2577             }
2578
2579           /* Now the stuff we want to add to SUBSTED
2580              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2581
2582           /* Make sure SUBSTED is big enough.  */
2583           if (substed_len + add_len >= substed_alloc_size)
2584             {
2585               substed_alloc_size = substed_len + add_len + 500;
2586               substed = (unsigned char *) xrealloc (substed,
2587                                                     substed_alloc_size + 1);
2588             }
2589
2590           /* Now add to the end of SUBSTED.  */
2591           if (add_stuff)
2592             {
2593               bcopy (add_stuff, substed + substed_len, add_len);
2594               substed_len += add_len;
2595             }
2596         }
2597
2598       if (really_changed)
2599         {
2600           if (buf_multibyte)
2601             {
2602               int nchars = multibyte_chars_in_text (substed, substed_len);
2603
2604               newtext = make_multibyte_string (substed, nchars, substed_len);
2605             }
2606           else
2607             newtext = make_unibyte_string (substed, substed_len);
2608         }
2609       xfree (substed);
2610     }
2611
2612   /* Replace the old text with the new in the cleanest possible way.  */
2613   replace_range (search_regs.start[sub], search_regs.end[sub],
2614                  newtext, 1, 0, 1);
2615   newpoint = search_regs.start[sub] + SCHARS (newtext);
2616
2617   if (case_action == all_caps)
2618     Fupcase_region (make_number (search_regs.start[sub]),
2619                     make_number (newpoint));
2620   else if (case_action == cap_initial)
2621     Fupcase_initials_region (make_number (search_regs.start[sub]),
2622                              make_number (newpoint));
2623
2624   /* Adjust search data for this change.  */
2625   {
2626     int oldend = search_regs.end[sub];
2627     int oldstart = search_regs.start[sub];
2628     int change = newpoint - search_regs.end[sub];
2629     int i;
2630
2631     for (i = 0; i < search_regs.num_regs; i++)
2632       {
2633         if (search_regs.start[i] >= oldend)
2634           search_regs.start[i] += change;
2635         else if (search_regs.start[i] > oldstart)
2636           search_regs.start[i] = oldstart;
2637         if (search_regs.end[i] >= oldend)
2638           search_regs.end[i] += change;
2639         else if (search_regs.end[i] > oldstart)
2640           search_regs.end[i] = oldstart;
2641       }
2642   }
2643
2644   /* Put point back where it was in the text.  */
2645   if (opoint <= 0)
2646     TEMP_SET_PT (opoint + ZV);
2647   else
2648     TEMP_SET_PT (opoint);
2649
2650   /* Now move point "officially" to the start of the inserted replacement.  */
2651   move_if_not_intangible (newpoint);
2652
2653   return Qnil;
2654 }
2655 \f
2656 static Lisp_Object
2657 match_limit (num, beginningp)
2658      Lisp_Object num;
2659      int beginningp;
2660 {
2661   register int n;
2662
2663   CHECK_NUMBER (num);
2664   n = XINT (num);
2665   if (n < 0)
2666     args_out_of_range (num, make_number (0));
2667   if (search_regs.num_regs <= 0)
2668     error ("No match data, because no search succeeded");
2669   if (n >= search_regs.num_regs
2670       || search_regs.start[n] < 0)
2671     return Qnil;
2672   return (make_number ((beginningp) ? search_regs.start[n]
2673                                     : search_regs.end[n]));
2674 }
2675
2676 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2677        doc: /* Return position of start of text matched by last search.
2678 SUBEXP, a number, specifies which parenthesized expression in the last
2679   regexp.
2680 Value is nil if SUBEXPth pair didn't match, or there were less than
2681   SUBEXP pairs.
2682 Zero means the entire text matched by the whole regexp or whole string.  */)
2683      (subexp)
2684      Lisp_Object subexp;
2685 {
2686   return match_limit (subexp, 1);
2687 }
2688
2689 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2690        doc: /* Return position of end of text matched by last search.
2691 SUBEXP, a number, specifies which parenthesized expression in the last
2692   regexp.
2693 Value is nil if SUBEXPth pair didn't match, or there were less than
2694   SUBEXP pairs.
2695 Zero means the entire text matched by the whole regexp or whole string.  */)
2696      (subexp)
2697      Lisp_Object subexp;
2698 {
2699   return match_limit (subexp, 0);
2700 }
2701
2702 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2703        doc: /* Return a list containing all info on what the last search matched.
2704 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2705 All the elements are markers or nil (nil if the Nth pair didn't match)
2706 if the last match was on a buffer; integers or nil if a string was matched.
2707 Use `store-match-data' to reinstate the data in this list.
2708
2709 If INTEGERS (the optional first argument) is non-nil, always use
2710 integers \(rather than markers) to represent buffer positions.  In
2711 this case, and if the last match was in a buffer, the buffer will get
2712 stored as one additional element at the end of the list.
2713
2714 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough
2715 to hold all the values, and if INTEGERS is non-nil, no consing is done.
2716
2717 Return value is undefined if the last search failed.  */)
2718      (integers, reuse)
2719      Lisp_Object integers, reuse;
2720 {
2721   Lisp_Object tail, prev;
2722   Lisp_Object *data;
2723   int i, len;
2724
2725   if (NILP (last_thing_searched))
2726     return Qnil;
2727
2728   prev = Qnil;
2729
2730   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2731                                  * sizeof (Lisp_Object));
2732
2733   len = 0;
2734   for (i = 0; i < search_regs.num_regs; i++)
2735     {
2736       int start = search_regs.start[i];
2737       if (start >= 0)
2738         {
2739           if (EQ (last_thing_searched, Qt)
2740               || ! NILP (integers))
2741             {
2742               XSETFASTINT (data[2 * i], start);
2743               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2744             }
2745           else if (BUFFERP (last_thing_searched))
2746             {
2747               data[2 * i] = Fmake_marker ();
2748               Fset_marker (data[2 * i],
2749                            make_number (start),
2750                            last_thing_searched);
2751               data[2 * i + 1] = Fmake_marker ();
2752               Fset_marker (data[2 * i + 1],
2753                            make_number (search_regs.end[i]),
2754                            last_thing_searched);
2755             }
2756           else
2757             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2758             abort ();
2759
2760           len = 2*(i+1);
2761         }
2762       else
2763         data[2 * i] = data [2 * i + 1] = Qnil;
2764     }
2765
2766   if (BUFFERP (last_thing_searched) && !NILP (integers))
2767     {
2768       data[len] = last_thing_searched;
2769       len++;
2770     }
2771
2772   /* If REUSE is not usable, cons up the values and return them.  */
2773   if (! CONSP (reuse))
2774     return Flist (len, data);
2775
2776   /* If REUSE is a list, store as many value elements as will fit
2777      into the elements of REUSE.  */
2778   for (i = 0, tail = reuse; CONSP (tail);
2779        i++, tail = XCDR (tail))
2780     {
2781       if (i < len)
2782         XSETCAR (tail, data[i]);
2783       else
2784         XSETCAR (tail, Qnil);
2785       prev = tail;
2786     }
2787
2788   /* If we couldn't fit all value elements into REUSE,
2789      cons up the rest of them and add them to the end of REUSE.  */
2790   if (i < len)
2791     XSETCDR (prev, Flist (len - i, data + i));
2792
2793   return reuse;
2794 }
2795
2796
2797 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2798        doc: /* Set internal data on last search match from elements of LIST.
2799 LIST should have been created by calling `match-data' previously.  */)
2800      (list)
2801      register Lisp_Object list;
2802 {
2803   register int i;
2804   register Lisp_Object marker;
2805
2806   if (running_asynch_code)
2807     save_search_regs ();
2808
2809   if (!CONSP (list) && !NILP (list))
2810     list = wrong_type_argument (Qconsp, list);
2811
2812   /* Unless we find a marker with a buffer or an explicit buffer
2813      in LIST, assume that this match data came from a string.  */
2814   last_thing_searched = Qt;
2815
2816   /* Allocate registers if they don't already exist.  */
2817   {
2818     int length = XFASTINT (Flength (list)) / 2;
2819
2820     if (length > search_regs.num_regs)
2821       {
2822         if (search_regs.num_regs == 0)
2823           {
2824             search_regs.start
2825               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2826             search_regs.end
2827               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2828           }
2829         else
2830           {
2831             search_regs.start
2832               = (regoff_t *) xrealloc (search_regs.start,
2833                                        length * sizeof (regoff_t));
2834             search_regs.end
2835               = (regoff_t *) xrealloc (search_regs.end,
2836                                        length * sizeof (regoff_t));
2837           }
2838
2839         for (i = search_regs.num_regs; i < length; i++)
2840           search_regs.start[i] = -1;
2841
2842         search_regs.num_regs = length;
2843       }
2844
2845     for (i = 0;; i++)
2846       {
2847         marker = Fcar (list);
2848         if (BUFFERP (marker))
2849           {
2850             last_thing_searched = marker;
2851             break;
2852           }
2853         if (i >= length)
2854           break;
2855         if (NILP (marker))
2856           {
2857             search_regs.start[i] = -1;
2858             list = Fcdr (list);
2859           }
2860         else
2861           {
2862             int from;
2863
2864             if (MARKERP (marker))
2865               {
2866                 if (XMARKER (marker)->buffer == 0)
2867                   XSETFASTINT (marker, 0);
2868                 else
2869                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2870               }
2871
2872             CHECK_NUMBER_COERCE_MARKER (marker);
2873             from = XINT (marker);
2874             list = Fcdr (list);
2875
2876             marker = Fcar (list);
2877             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2878               XSETFASTINT (marker, 0);
2879
2880             CHECK_NUMBER_COERCE_MARKER (marker);
2881             search_regs.start[i] = from;
2882             search_regs.end[i] = XINT (marker);
2883           }
2884         list = Fcdr (list);
2885       }
2886
2887     for (; i < search_regs.num_regs; i++)
2888       search_regs.start[i] = -1;
2889   }
2890
2891   return Qnil;
2892 }
2893
2894 /* If non-zero the match data have been saved in saved_search_regs
2895    during the execution of a sentinel or filter. */
2896 static int search_regs_saved;
2897 static struct re_registers saved_search_regs;
2898 static Lisp_Object saved_last_thing_searched;
2899
2900 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2901    if asynchronous code (filter or sentinel) is running. */
2902 static void
2903 save_search_regs ()
2904 {
2905   if (!search_regs_saved)
2906     {
2907       saved_search_regs.num_regs = search_regs.num_regs;
2908       saved_search_regs.start = search_regs.start;
2909       saved_search_regs.end = search_regs.end;
2910       saved_last_thing_searched = last_thing_searched;
2911       last_thing_searched = Qnil;
2912       search_regs.num_regs = 0;
2913       search_regs.start = 0;
2914       search_regs.end = 0;
2915
2916       search_regs_saved = 1;
2917     }
2918 }
2919
2920 /* Called upon exit from filters and sentinels. */
2921 void
2922 restore_match_data ()
2923 {
2924   if (search_regs_saved)
2925     {
2926       if (search_regs.num_regs > 0)
2927         {
2928           xfree (search_regs.start);
2929           xfree (search_regs.end);
2930         }
2931       search_regs.num_regs = saved_search_regs.num_regs;
2932       search_regs.start = saved_search_regs.start;
2933       search_regs.end = saved_search_regs.end;
2934       last_thing_searched = saved_last_thing_searched;
2935       saved_last_thing_searched = Qnil;
2936       search_regs_saved = 0;
2937     }
2938 }
2939
2940 /* Quote a string to inactivate reg-expr chars */
2941
2942 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2943        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
2944      (string)
2945      Lisp_Object string;
2946 {
2947   register unsigned char *in, *out, *end;
2948   register unsigned char *temp;
2949   int backslashes_added = 0;
2950
2951   CHECK_STRING (string);
2952
2953   temp = (unsigned char *) alloca (SBYTES (string) * 2);
2954
2955   /* Now copy the data into the new string, inserting escapes. */
2956
2957   in = SDATA (string);
2958   end = in + SBYTES (string);
2959   out = temp;
2960
2961   for (; in != end; in++)
2962     {
2963       if (*in == '[' || *in == ']'
2964           || *in == '*' || *in == '.' || *in == '\\'
2965           || *in == '?' || *in == '+'
2966           || *in == '^' || *in == '$')
2967         *out++ = '\\', backslashes_added++;
2968       *out++ = *in;
2969     }
2970
2971   return make_specified_string (temp,
2972                                 SCHARS (string) + backslashes_added,
2973                                 out - temp,
2974                                 STRING_MULTIBYTE (string));
2975 }
2976 \f
2977 void
2978 syms_of_search ()
2979 {
2980   register int i;
2981
2982   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2983     {
2984       searchbufs[i].buf.allocated = 100;
2985       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
2986       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2987       searchbufs[i].regexp = Qnil;
2988       searchbufs[i].whitespace_regexp = Qnil;
2989       staticpro (&searchbufs[i].regexp);
2990       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2991     }
2992   searchbuf_head = &searchbufs[0];
2993
2994   Qsearch_failed = intern ("search-failed");
2995   staticpro (&Qsearch_failed);
2996   Qinvalid_regexp = intern ("invalid-regexp");
2997   staticpro (&Qinvalid_regexp);
2998
2999   Fput (Qsearch_failed, Qerror_conditions,
3000         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3001   Fput (Qsearch_failed, Qerror_message,
3002         build_string ("Search failed"));
3003
3004   Fput (Qinvalid_regexp, Qerror_conditions,
3005         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3006   Fput (Qinvalid_regexp, Qerror_message,
3007         build_string ("Invalid regexp"));
3008
3009   last_thing_searched = Qnil;
3010   staticpro (&last_thing_searched);
3011
3012   saved_last_thing_searched = Qnil;
3013   staticpro (&saved_last_thing_searched);
3014
3015   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3016       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3017 Some commands use this for user-specified regexps.
3018 Spaces that occur inside character classes or repetition operators
3019 or other such regexp constructs are not replaced with this.
3020 A value of nil (which is the normal value) means treat spaces literally.  */);
3021   Vsearch_spaces_regexp = Qnil;
3022
3023   defsubr (&Slooking_at);
3024   defsubr (&Sposix_looking_at);
3025   defsubr (&Sstring_match);
3026   defsubr (&Sposix_string_match);
3027   defsubr (&Ssearch_forward);
3028   defsubr (&Ssearch_backward);
3029   defsubr (&Sword_search_forward);
3030   defsubr (&Sword_search_backward);
3031   defsubr (&Sre_search_forward);
3032   defsubr (&Sre_search_backward);
3033   defsubr (&Sposix_search_forward);
3034   defsubr (&Sposix_search_backward);
3035   defsubr (&Sreplace_match);
3036   defsubr (&Smatch_beginning);
3037   defsubr (&Smatch_end);
3038   defsubr (&Smatch_data);
3039   defsubr (&Sset_match_data);
3040   defsubr (&Sregexp_quote);
3041 }
3042
3043 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3044    (do not change this comment) */