code.delx.au - gnu-emacs/blob - src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007  Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20 Boston, MA 02110-1301, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 /* Error condition used for failing searches */
  92 Lisp_Object Qsearch_failed;
  93
  94 Lisp_Object Vsearch_spaces_regexp;
  95
  96 static void set_search_regs ();
  97 static void save_search_regs ();
  98 static int simple_search ();
  99 static int boyer_moore ();
 100 static int search_buffer ();
 101 static void matcher_overflow () NO_RETURN;
 102
 103 static void
 104 matcher_overflow ()
 105 {
 106   error ("Stack overflow in regexp matcher");
 107 }
 108
 109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 110    PATTERN is the pattern to compile.
 111    CP is the place to put the result.
 112    TRANSLATE is a translation table for ignoring case, or nil for none.
 113    REGP is the structure that says where to store the "register"
 114    values that will result from matching this pattern.
 115    If it is 0, we should compile the pattern not to record any
 116    subexpression bounds.
 117    POSIX is nonzero if we want full backtracking (POSIX style)
 118    for this pattern.  0 means backtrack only enough to get a valid match.
 119
 120    The behavior also depends on Vsearch_spaces_regexp.  */
 121
 122 static void
 123 compile_pattern_1 (cp, pattern, translate, regp, posix)
 124      struct regexp_cache *cp;
 125      Lisp_Object pattern;
 126      Lisp_Object translate;
 127      struct re_registers *regp;
 128      int posix;
 129 {
 130   char *val;
 131   reg_syntax_t old;
 132
 133   cp->regexp = Qnil;
 134   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 135   cp->posix = posix;
 136   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 137   cp->buf.charset_unibyte = charset_unibyte;
 138   cp->whitespace_regexp = Vsearch_spaces_regexp;
 139   /* rms: I think BLOCK_INPUT is not needed here any more,
 140      because regex.c defines malloc to call xmalloc.
 141      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 142      So let's turn it off.  */
 143   /*  BLOCK_INPUT;  */
 144   old = re_set_syntax (RE_SYNTAX_EMACS
 145                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 146   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 147                             : SDATA (Vsearch_spaces_regexp));
 148
 149   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 150                                      SBYTES (pattern), &cp->buf);
 151
 152   /* If the compiled pattern hard codes some of the contents of the
 153      syntax-table, it can only be reused with *this* syntax table.  */
 154   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 155
 156   re_set_whitespace_regexp (NULL);
 157
 158   re_set_syntax (old);
 159   /* UNBLOCK_INPUT;  */
 160   if (val)
 161     xsignal1 (Qinvalid_regexp, build_string (val));
 162
 163   cp->regexp = Fcopy_sequence (pattern);
 164 }
 165
 166 /* Shrink each compiled regexp buffer in the cache
 167    to the size actually used right now.
 168    This is called from garbage collection.  */
 169
 170 void
 171 shrink_regexp_cache ()
 172 {
 173   struct regexp_cache *cp;
 174
 175   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 176     {
 177       cp->buf.allocated = cp->buf.used;
 178       cp->buf.buffer
 179         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 180     }
 181 }
 182
 183 /* Clear the regexp cache w.r.t. a particular syntax table,
 184    because it was changed.
 185    There is no danger of memory leak here because re_compile_pattern
 186    automagically manages the memory in each re_pattern_buffer struct,
 187    based on its `allocated' and `buffer' values.  */
 188 void
 189 clear_regexp_cache ()
 190 {
 191   int i;
 192
 193   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 194     /* It's tempting to compare with the syntax-table we've actually changd,
 195        but it's not sufficient because char-table inheritance mewans that
 196        modifying one syntax-table can change others at the same time.  */
 197     if (!EQ (searchbufs[i].syntax_table, Qt))
 198       searchbufs[i].regexp = Qnil;
 199 }
 200
 201 /* Compile a regexp if necessary, but first check to see if there's one in
 202    the cache.
 203    PATTERN is the pattern to compile.
 204    TRANSLATE is a translation table for ignoring case, or nil for none.
 205    REGP is the structure that says where to store the "register"
 206    values that will result from matching this pattern.
 207    If it is 0, we should compile the pattern not to record any
 208    subexpression bounds.
 209    POSIX is nonzero if we want full backtracking (POSIX style)
 210    for this pattern.  0 means backtrack only enough to get a valid match.  */
 211
 212 struct re_pattern_buffer *
 213 compile_pattern (pattern, regp, translate, posix, multibyte)
 214      Lisp_Object pattern;
 215      struct re_registers *regp;
 216      Lisp_Object translate;
 217      int posix, multibyte;
 218 {
 219   struct regexp_cache *cp, **cpp;
 220
 221   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 222     {
 223       cp = *cpp;
 224       /* Entries are initialized to nil, and may be set to nil by
 225          compile_pattern_1 if the pattern isn't valid.  Don't apply
 226          string accessors in those cases.  However, compile_pattern_1
 227          is only applied to the cache entry we pick here to reuse.  So
 228          nil should never appear before a non-nil entry.  */
 229       if (NILP (cp->regexp))
 230         goto compile_it;
 231       if (SCHARS (cp->regexp) == SCHARS (pattern)
 232           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 233           && !NILP (Fstring_equal (cp->regexp, pattern))
 234           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 235           && cp->posix == posix
 236           && (EQ (cp->syntax_table, Qt)
 237               || EQ (cp->syntax_table, current_buffer->syntax_table))
 238           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 239           && cp->buf.charset_unibyte == charset_unibyte)
 240         break;
 241
 242       /* If we're at the end of the cache, compile into the nil cell
 243          we found, or the last (least recently used) cell with a
 244          string value.  */
 245       if (cp->next == 0)
 246         {
 247         compile_it:
 248           compile_pattern_1 (cp, pattern, translate, regp, posix);
 249           break;
 250         }
 251     }
 252
 253   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 254      either because we found it in the cache or because we just compiled it.
 255      Move it to the front of the queue to mark it as most recently used.  */
 256   *cpp = cp->next;
 257   cp->next = searchbuf_head;
 258   searchbuf_head = cp;
 259
 260   /* Advise the searching functions about the space we have allocated
 261      for register data.  */
 262   if (regp)
 263     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 264
 265   /* The compiled pattern can be used both for mulitbyte and unibyte
 266      target.  But, we have to tell which the pattern is used for. */
 267   cp->buf.target_multibyte = multibyte;
 268
 269   return &cp->buf;
 270 }
 271
 272 \f
 273 static Lisp_Object
 274 looking_at_1 (string, posix)
 275      Lisp_Object string;
 276      int posix;
 277 {
 278   Lisp_Object val;
 279   unsigned char *p1, *p2;
 280   int s1, s2;
 281   register int i;
 282   struct re_pattern_buffer *bufp;
 283
 284   if (running_asynch_code)
 285     save_search_regs ();
 286
 287   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 288   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 289     = current_buffer->case_eqv_table;
 290
 291   CHECK_STRING (string);
 292   bufp = compile_pattern (string, &search_regs,
 293                           (!NILP (current_buffer->case_fold_search)
 294                            ? current_buffer->case_canon_table : Qnil),
 295                           posix,
 296                           !NILP (current_buffer->enable_multibyte_characters));
 297
 298   immediate_quit = 1;
 299   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 300
 301   /* Get pointers and sizes of the two strings
 302      that make up the visible portion of the buffer. */
 303
 304   p1 = BEGV_ADDR;
 305   s1 = GPT_BYTE - BEGV_BYTE;
 306   p2 = GAP_END_ADDR;
 307   s2 = ZV_BYTE - GPT_BYTE;
 308   if (s1 < 0)
 309     {
 310       p2 = p1;
 311       s2 = ZV_BYTE - BEGV_BYTE;
 312       s1 = 0;
 313     }
 314   if (s2 < 0)
 315     {
 316       s1 = ZV_BYTE - BEGV_BYTE;
 317       s2 = 0;
 318     }
 319
 320   re_match_object = Qnil;
 321
 322   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 323                   PT_BYTE - BEGV_BYTE, &search_regs,
 324                   ZV_BYTE - BEGV_BYTE);
 325   immediate_quit = 0;
 326
 327   if (i == -2)
 328     matcher_overflow ();
 329
 330   val = (0 <= i ? Qt : Qnil);
 331   if (i >= 0)
 332     for (i = 0; i < search_regs.num_regs; i++)
 333       if (search_regs.start[i] >= 0)
 334         {
 335           search_regs.start[i]
 336             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 337           search_regs.end[i]
 338             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 339         }
 340   XSETBUFFER (last_thing_searched, current_buffer);
 341   return val;
 342 }
 343
 344 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 345        doc: /* Return t if text after point matches regular expression REGEXP.
 346 This function modifies the match data that `match-beginning',
 347 `match-end' and `match-data' access; save and restore the match
 348 data if you want to preserve them.  */)
 349      (regexp)
 350      Lisp_Object regexp;
 351 {
 352   return looking_at_1 (regexp, 0);
 353 }
 354
 355 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 356        doc: /* Return t if text after point matches regular expression REGEXP.
 357 Find the longest match, in accord with Posix regular expression rules.
 358 This function modifies the match data that `match-beginning',
 359 `match-end' and `match-data' access; save and restore the match
 360 data if you want to preserve them.  */)
 361      (regexp)
 362      Lisp_Object regexp;
 363 {
 364   return looking_at_1 (regexp, 1);
 365 }
 366 \f
 367 static Lisp_Object
 368 string_match_1 (regexp, string, start, posix)
 369      Lisp_Object regexp, string, start;
 370      int posix;
 371 {
 372   int val;
 373   struct re_pattern_buffer *bufp;
 374   int pos, pos_byte;
 375   int i;
 376
 377   if (running_asynch_code)
 378     save_search_regs ();
 379
 380   CHECK_STRING (regexp);
 381   CHECK_STRING (string);
 382
 383   if (NILP (start))
 384     pos = 0, pos_byte = 0;
 385   else
 386     {
 387       int len = SCHARS (string);
 388
 389       CHECK_NUMBER (start);
 390       pos = XINT (start);
 391       if (pos < 0 && -pos <= len)
 392         pos = len + pos;
 393       else if (0 > pos || pos > len)
 394         args_out_of_range (string, start);
 395       pos_byte = string_char_to_byte (string, pos);
 396     }
 397
 398   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 399   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 400     = current_buffer->case_eqv_table;
 401
 402   bufp = compile_pattern (regexp, &search_regs,
 403                           (!NILP (current_buffer->case_fold_search)
 404                            ? current_buffer->case_canon_table : Qnil),
 405                           posix,
 406                           STRING_MULTIBYTE (string));
 407   immediate_quit = 1;
 408   re_match_object = string;
 409
 410   val = re_search (bufp, (char *) SDATA (string),
 411                    SBYTES (string), pos_byte,
 412                    SBYTES (string) - pos_byte,
 413                    &search_regs);
 414   immediate_quit = 0;
 415   last_thing_searched = Qt;
 416   if (val == -2)
 417     matcher_overflow ();
 418   if (val < 0) return Qnil;
 419
 420   for (i = 0; i < search_regs.num_regs; i++)
 421     if (search_regs.start[i] >= 0)
 422       {
 423         search_regs.start[i]
 424           = string_byte_to_char (string, search_regs.start[i]);
 425         search_regs.end[i]
 426           = string_byte_to_char (string, search_regs.end[i]);
 427       }
 428
 429   return make_number (string_byte_to_char (string, val));
 430 }
 431
 432 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 433        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 434 Matching ignores case if `case-fold-search' is non-nil.
 435 If third arg START is non-nil, start search at that index in STRING.
 436 For index of first char beyond the match, do (match-end 0).
 437 `match-end' and `match-beginning' also give indices of substrings
 438 matched by parenthesis constructs in the pattern.
 439
 440 You can use the function `match-string' to extract the substrings
 441 matched by the parenthesis constructions in REGEXP. */)
 442      (regexp, string, start)
 443      Lisp_Object regexp, string, start;
 444 {
 445   return string_match_1 (regexp, string, start, 0);
 446 }
 447
 448 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 449        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 450 Find the longest match, in accord with Posix regular expression rules.
 451 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 452 If third arg START is non-nil, start search at that index in STRING.
 453 For index of first char beyond the match, do (match-end 0).
 454 `match-end' and `match-beginning' also give indices of substrings
 455 matched by parenthesis constructs in the pattern.  */)
 456      (regexp, string, start)
 457      Lisp_Object regexp, string, start;
 458 {
 459   return string_match_1 (regexp, string, start, 1);
 460 }
 461
 462 /* Match REGEXP against STRING, searching all of STRING,
 463    and return the index of the match, or negative on failure.
 464    This does not clobber the match data.  */
 465
 466 int
 467 fast_string_match (regexp, string)
 468      Lisp_Object regexp, string;
 469 {
 470   int val;
 471   struct re_pattern_buffer *bufp;
 472
 473   bufp = compile_pattern (regexp, 0, Qnil,
 474                           0, STRING_MULTIBYTE (string));
 475   immediate_quit = 1;
 476   re_match_object = string;
 477
 478   val = re_search (bufp, (char *) SDATA (string),
 479                    SBYTES (string), 0,
 480                    SBYTES (string), 0);
 481   immediate_quit = 0;
 482   return val;
 483 }
 484
 485 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 486    and return the index of the match, or negative on failure.
 487    This does not clobber the match data.
 488    We assume that STRING contains single-byte characters.  */
 489
 490 extern Lisp_Object Vascii_downcase_table;
 491
 492 int
 493 fast_c_string_match_ignore_case (regexp, string)
 494      Lisp_Object regexp;
 495      const char *string;
 496 {
 497   int val;
 498   struct re_pattern_buffer *bufp;
 499   int len = strlen (string);
 500
 501   regexp = string_make_unibyte (regexp);
 502   re_match_object = Qt;
 503   bufp = compile_pattern (regexp, 0,
 504                           Vascii_canon_table, 0,
 505                           0);
 506   immediate_quit = 1;
 507   val = re_search (bufp, string, len, 0, len, 0);
 508   immediate_quit = 0;
 509   return val;
 510 }
 511
 512 /* Like fast_string_match but ignore case.  */
 513
 514 int
 515 fast_string_match_ignore_case (regexp, string)
 516      Lisp_Object regexp, string;
 517 {
 518   int val;
 519   struct re_pattern_buffer *bufp;
 520
 521   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 522                           0, STRING_MULTIBYTE (string));
 523   immediate_quit = 1;
 524   re_match_object = string;
 525
 526   val = re_search (bufp, (char *) SDATA (string),
 527                    SBYTES (string), 0,
 528                    SBYTES (string), 0);
 529   immediate_quit = 0;
 530   return val;
 531 }
 532 \f
 533 /* The newline cache: remembering which sections of text have no newlines.  */
 534
 535 /* If the user has requested newline caching, make sure it's on.
 536    Otherwise, make sure it's off.
 537    This is our cheezy way of associating an action with the change of
 538    state of a buffer-local variable.  */
 539 static void
 540 newline_cache_on_off (buf)
 541      struct buffer *buf;
 542 {
 543   if (NILP (buf->cache_long_line_scans))
 544     {
 545       /* It should be off.  */
 546       if (buf->newline_cache)
 547         {
 548           free_region_cache (buf->newline_cache);
 549           buf->newline_cache = 0;
 550         }
 551     }
 552   else
 553     {
 554       /* It should be on.  */
 555       if (buf->newline_cache == 0)
 556         buf->newline_cache = new_region_cache ();
 557     }
 558 }
 559
 560 \f
 561 /* Search for COUNT instances of the character TARGET between START and END.
 562
 563    If COUNT is positive, search forwards; END must be >= START.
 564    If COUNT is negative, search backwards for the -COUNTth instance;
 565       END must be <= START.
 566    If COUNT is zero, do anything you please; run rogue, for all I care.
 567
 568    If END is zero, use BEGV or ZV instead, as appropriate for the
 569    direction indicated by COUNT.
 570
 571    If we find COUNT instances, set *SHORTAGE to zero, and return the
 572    position past the COUNTth match.  Note that for reverse motion
 573    this is not the same as the usual convention for Emacs motion commands.
 574
 575    If we don't find COUNT instances before reaching END, set *SHORTAGE
 576    to the number of TARGETs left unfound, and return END.
 577
 578    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 579    except when inside redisplay.  */
 580
 581 int
 582 scan_buffer (target, start, end, count, shortage, allow_quit)
 583      register int target;
 584      int start, end;
 585      int count;
 586      int *shortage;
 587      int allow_quit;
 588 {
 589   struct region_cache *newline_cache;
 590   int direction;
 591
 592   if (count > 0)
 593     {
 594       direction = 1;
 595       if (! end) end = ZV;
 596     }
 597   else
 598     {
 599       direction = -1;
 600       if (! end) end = BEGV;
 601     }
 602
 603   newline_cache_on_off (current_buffer);
 604   newline_cache = current_buffer->newline_cache;
 605
 606   if (shortage != 0)
 607     *shortage = 0;
 608
 609   immediate_quit = allow_quit;
 610
 611   if (count > 0)
 612     while (start != end)
 613       {
 614         /* Our innermost scanning loop is very simple; it doesn't know
 615            about gaps, buffer ends, or the newline cache.  ceiling is
 616            the position of the last character before the next such
 617            obstacle --- the last character the dumb search loop should
 618            examine.  */
 619         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 620         int start_byte = CHAR_TO_BYTE (start);
 621         int tem;
 622
 623         /* If we're looking for a newline, consult the newline cache
 624            to see where we can avoid some scanning.  */
 625         if (target == '\n' && newline_cache)
 626           {
 627             int next_change;
 628             immediate_quit = 0;
 629             while (region_cache_forward
 630                    (current_buffer, newline_cache, start_byte, &next_change))
 631               start_byte = next_change;
 632             immediate_quit = allow_quit;
 633
 634             /* START should never be after END.  */
 635             if (start_byte > ceiling_byte)
 636               start_byte = ceiling_byte;
 637
 638             /* Now the text after start is an unknown region, and
 639                next_change is the position of the next known region. */
 640             ceiling_byte = min (next_change - 1, ceiling_byte);
 641           }
 642
 643         /* The dumb loop can only scan text stored in contiguous
 644            bytes. BUFFER_CEILING_OF returns the last character
 645            position that is contiguous, so the ceiling is the
 646            position after that.  */
 647         tem = BUFFER_CEILING_OF (start_byte);
 648         ceiling_byte = min (tem, ceiling_byte);
 649
 650         {
 651           /* The termination address of the dumb loop.  */
 652           register unsigned char *ceiling_addr
 653             = BYTE_POS_ADDR (ceiling_byte) + 1;
 654           register unsigned char *cursor
 655             = BYTE_POS_ADDR (start_byte);
 656           unsigned char *base = cursor;
 657
 658           while (cursor < ceiling_addr)
 659             {
 660               unsigned char *scan_start = cursor;
 661
 662               /* The dumb loop.  */
 663               while (*cursor != target && ++cursor < ceiling_addr)
 664                 ;
 665
 666               /* If we're looking for newlines, cache the fact that
 667                  the region from start to cursor is free of them. */
 668               if (target == '\n' && newline_cache)
 669                 know_region_cache (current_buffer, newline_cache,
 670                                    start_byte + scan_start - base,
 671                                    start_byte + cursor - base);
 672
 673               /* Did we find the target character?  */
 674               if (cursor < ceiling_addr)
 675                 {
 676                   if (--count == 0)
 677                     {
 678                       immediate_quit = 0;
 679                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 680                     }
 681                   cursor++;
 682                 }
 683             }
 684
 685           start = BYTE_TO_CHAR (start_byte + cursor - base);
 686         }
 687       }
 688   else
 689     while (start > end)
 690       {
 691         /* The last character to check before the next obstacle.  */
 692         int ceiling_byte = CHAR_TO_BYTE (end);
 693         int start_byte = CHAR_TO_BYTE (start);
 694         int tem;
 695
 696         /* Consult the newline cache, if appropriate.  */
 697         if (target == '\n' && newline_cache)
 698           {
 699             int next_change;
 700             immediate_quit = 0;
 701             while (region_cache_backward
 702                    (current_buffer, newline_cache, start_byte, &next_change))
 703               start_byte = next_change;
 704             immediate_quit = allow_quit;
 705
 706             /* Start should never be at or before end.  */
 707             if (start_byte <= ceiling_byte)
 708               start_byte = ceiling_byte + 1;
 709
 710             /* Now the text before start is an unknown region, and
 711                next_change is the position of the next known region. */
 712             ceiling_byte = max (next_change, ceiling_byte);
 713           }
 714
 715         /* Stop scanning before the gap.  */
 716         tem = BUFFER_FLOOR_OF (start_byte - 1);
 717         ceiling_byte = max (tem, ceiling_byte);
 718
 719         {
 720           /* The termination address of the dumb loop.  */
 721           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 722           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 723           unsigned char *base = cursor;
 724
 725           while (cursor >= ceiling_addr)
 726             {
 727               unsigned char *scan_start = cursor;
 728
 729               while (*cursor != target && --cursor >= ceiling_addr)
 730                 ;
 731
 732               /* If we're looking for newlines, cache the fact that
 733                  the region from after the cursor to start is free of them.  */
 734               if (target == '\n' && newline_cache)
 735                 know_region_cache (current_buffer, newline_cache,
 736                                    start_byte + cursor - base,
 737                                    start_byte + scan_start - base);
 738
 739               /* Did we find the target character?  */
 740               if (cursor >= ceiling_addr)
 741                 {
 742                   if (++count >= 0)
 743                     {
 744                       immediate_quit = 0;
 745                       return BYTE_TO_CHAR (start_byte + cursor - base);
 746                     }
 747                   cursor--;
 748                 }
 749             }
 750
 751           start = BYTE_TO_CHAR (start_byte + cursor - base);
 752         }
 753       }
 754
 755   immediate_quit = 0;
 756   if (shortage != 0)
 757     *shortage = count * direction;
 758   return start;
 759 }
 760 \f
 761 /* Search for COUNT instances of a line boundary, which means either a
 762    newline or (if selective display enabled) a carriage return.
 763    Start at START.  If COUNT is negative, search backwards.
 764
 765    We report the resulting position by calling TEMP_SET_PT_BOTH.
 766
 767    If we find COUNT instances. we position after (always after,
 768    even if scanning backwards) the COUNTth match, and return 0.
 769
 770    If we don't find COUNT instances before reaching the end of the
 771    buffer (or the beginning, if scanning backwards), we return
 772    the number of line boundaries left unfound, and position at
 773    the limit we bumped up against.
 774
 775    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 776    except in special cases.  */
 777
 778 int
 779 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 780      int start, start_byte;
 781      int limit, limit_byte;
 782      register int count;
 783      int allow_quit;
 784 {
 785   int direction = ((count > 0) ? 1 : -1);
 786
 787   register unsigned char *cursor;
 788   unsigned char *base;
 789
 790   register int ceiling;
 791   register unsigned char *ceiling_addr;
 792
 793   int old_immediate_quit = immediate_quit;
 794
 795   /* The code that follows is like scan_buffer
 796      but checks for either newline or carriage return.  */
 797
 798   if (allow_quit)
 799     immediate_quit++;
 800
 801   start_byte = CHAR_TO_BYTE (start);
 802
 803   if (count > 0)
 804     {
 805       while (start_byte < limit_byte)
 806         {
 807           ceiling =  BUFFER_CEILING_OF (start_byte);
 808           ceiling = min (limit_byte - 1, ceiling);
 809           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 810           base = (cursor = BYTE_POS_ADDR (start_byte));
 811           while (1)
 812             {
 813               while (*cursor != '\n' && ++cursor != ceiling_addr)
 814                 ;
 815
 816               if (cursor != ceiling_addr)
 817                 {
 818                   if (--count == 0)
 819                     {
 820                       immediate_quit = old_immediate_quit;
 821                       start_byte = start_byte + cursor - base + 1;
 822                       start = BYTE_TO_CHAR (start_byte);
 823                       TEMP_SET_PT_BOTH (start, start_byte);
 824                       return 0;
 825                     }
 826                   else
 827                     if (++cursor == ceiling_addr)
 828                       break;
 829                 }
 830               else
 831                 break;
 832             }
 833           start_byte += cursor - base;
 834         }
 835     }
 836   else
 837     {
 838       while (start_byte > limit_byte)
 839         {
 840           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 841           ceiling = max (limit_byte, ceiling);
 842           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 843           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 844           while (1)
 845             {
 846               while (--cursor != ceiling_addr && *cursor != '\n')
 847                 ;
 848
 849               if (cursor != ceiling_addr)
 850                 {
 851                   if (++count == 0)
 852                     {
 853                       immediate_quit = old_immediate_quit;
 854                       /* Return the position AFTER the match we found.  */
 855                       start_byte = start_byte + cursor - base + 1;
 856                       start = BYTE_TO_CHAR (start_byte);
 857                       TEMP_SET_PT_BOTH (start, start_byte);
 858                       return 0;
 859                     }
 860                 }
 861               else
 862                 break;
 863             }
 864           /* Here we add 1 to compensate for the last decrement
 865              of CURSOR, which took it past the valid range.  */
 866           start_byte += cursor - base + 1;
 867         }
 868     }
 869
 870   TEMP_SET_PT_BOTH (limit, limit_byte);
 871   immediate_quit = old_immediate_quit;
 872
 873   return count * direction;
 874 }
 875
 876 int
 877 find_next_newline_no_quit (from, cnt)
 878      register int from, cnt;
 879 {
 880   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 881 }
 882
 883 /* Like find_next_newline, but returns position before the newline,
 884    not after, and only search up to TO.  This isn't just
 885    find_next_newline (...)-1, because you might hit TO.  */
 886
 887 int
 888 find_before_next_newline (from, to, cnt)
 889      int from, to, cnt;
 890 {
 891   int shortage;
 892   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 893
 894   if (shortage == 0)
 895     pos--;
 896
 897   return pos;
 898 }
 899 \f
 900 /* Subroutines of Lisp buffer search functions. */
 901
 902 static Lisp_Object
 903 search_command (string, bound, noerror, count, direction, RE, posix)
 904      Lisp_Object string, bound, noerror, count;
 905      int direction;
 906      int RE;
 907      int posix;
 908 {
 909   register int np;
 910   int lim, lim_byte;
 911   int n = direction;
 912
 913   if (!NILP (count))
 914     {
 915       CHECK_NUMBER (count);
 916       n *= XINT (count);
 917     }
 918
 919   CHECK_STRING (string);
 920   if (NILP (bound))
 921     {
 922       if (n > 0)
 923         lim = ZV, lim_byte = ZV_BYTE;
 924       else
 925         lim = BEGV, lim_byte = BEGV_BYTE;
 926     }
 927   else
 928     {
 929       CHECK_NUMBER_COERCE_MARKER (bound);
 930       lim = XINT (bound);
 931       if (n > 0 ? lim < PT : lim > PT)
 932         error ("Invalid search bound (wrong side of point)");
 933       if (lim > ZV)
 934         lim = ZV, lim_byte = ZV_BYTE;
 935       else if (lim < BEGV)
 936         lim = BEGV, lim_byte = BEGV_BYTE;
 937       else
 938         lim_byte = CHAR_TO_BYTE (lim);
 939     }
 940
 941   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 942   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 943     = current_buffer->case_eqv_table;
 944
 945   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 946                       (!NILP (current_buffer->case_fold_search)
 947                        ? current_buffer->case_canon_table
 948                        : Qnil),
 949                       (!NILP (current_buffer->case_fold_search)
 950                        ? current_buffer->case_eqv_table
 951                        : Qnil),
 952                       posix);
 953   if (np <= 0)
 954     {
 955       if (NILP (noerror))
 956         xsignal1 (Qsearch_failed, string);
 957
 958       if (!EQ (noerror, Qt))
 959         {
 960           if (lim < BEGV || lim > ZV)
 961             abort ();
 962           SET_PT_BOTH (lim, lim_byte);
 963           return Qnil;
 964 #if 0 /* This would be clean, but maybe programs depend on
 965          a value of nil here.  */
 966           np = lim;
 967 #endif
 968         }
 969       else
 970         return Qnil;
 971     }
 972
 973   if (np < BEGV || np > ZV)
 974     abort ();
 975
 976   SET_PT (np);
 977
 978   return make_number (np);
 979 }
 980 \f
 981 /* Return 1 if REGEXP it matches just one constant string.  */
 982
 983 static int
 984 trivial_regexp_p (regexp)
 985      Lisp_Object regexp;
 986 {
 987   int len = SBYTES (regexp);
 988   unsigned char *s = SDATA (regexp);
 989   while (--len >= 0)
 990     {
 991       switch (*s++)
 992         {
 993         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 994           return 0;
 995         case '\\':
 996           if (--len < 0)
 997             return 0;
 998           switch (*s++)
 999             {
1000             case '|': case '(': case ')': case '`': case '\'': case 'b':
1001             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1002             case 'S': case '=': case '{': case '}': case '_':
1003             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1004             case '1': case '2': case '3': case '4': case '5':
1005             case '6': case '7': case '8': case '9':
1006               return 0;
1007             }
1008         }
1009     }
1010   return 1;
1011 }
1012
1013 /* Search for the n'th occurrence of STRING in the current buffer,
1014    starting at position POS and stopping at position LIM,
1015    treating STRING as a literal string if RE is false or as
1016    a regular expression if RE is true.
1017
1018    If N is positive, searching is forward and LIM must be greater than POS.
1019    If N is negative, searching is backward and LIM must be less than POS.
1020
1021    Returns -x if x occurrences remain to be found (x > 0),
1022    or else the position at the beginning of the Nth occurrence
1023    (if searching backward) or the end (if searching forward).
1024
1025    POSIX is nonzero if we want full backtracking (POSIX style)
1026    for this pattern.  0 means backtrack only enough to get a valid match.  */
1027
1028 #define TRANSLATE(out, trt, d)                  \
1029 do                                              \
1030   {                                             \
1031     if (! NILP (trt))                           \
1032       {                                         \
1033         Lisp_Object temp;                       \
1034         temp = Faref (trt, make_number (d));    \
1035         if (INTEGERP (temp))                    \
1036           out = XINT (temp);                    \
1037         else                                    \
1038           out = d;                              \
1039       }                                         \
1040     else                                        \
1041       out = d;                                  \
1042   }                                             \
1043 while (0)
1044
1045 static int
1046 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1047                RE, trt, inverse_trt, posix)
1048      Lisp_Object string;
1049      int pos;
1050      int pos_byte;
1051      int lim;
1052      int lim_byte;
1053      int n;
1054      int RE;
1055      Lisp_Object trt;
1056      Lisp_Object inverse_trt;
1057      int posix;
1058 {
1059   int len = SCHARS (string);
1060   int len_byte = SBYTES (string);
1061   register int i;
1062
1063   if (running_asynch_code)
1064     save_search_regs ();
1065
1066   /* Searching 0 times means don't move.  */
1067   /* Null string is found at starting position.  */
1068   if (len == 0 || n == 0)
1069     {
1070       set_search_regs (pos_byte, 0);
1071       return pos;
1072     }
1073
1074   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1075     {
1076       unsigned char *p1, *p2;
1077       int s1, s2;
1078       struct re_pattern_buffer *bufp;
1079
1080       bufp = compile_pattern (string, &search_regs, trt, posix,
1081                               !NILP (current_buffer->enable_multibyte_characters));
1082
1083       immediate_quit = 1;       /* Quit immediately if user types ^G,
1084                                    because letting this function finish
1085                                    can take too long. */
1086       QUIT;                     /* Do a pending quit right away,
1087                                    to avoid paradoxical behavior */
1088       /* Get pointers and sizes of the two strings
1089          that make up the visible portion of the buffer. */
1090
1091       p1 = BEGV_ADDR;
1092       s1 = GPT_BYTE - BEGV_BYTE;
1093       p2 = GAP_END_ADDR;
1094       s2 = ZV_BYTE - GPT_BYTE;
1095       if (s1 < 0)
1096         {
1097           p2 = p1;
1098           s2 = ZV_BYTE - BEGV_BYTE;
1099           s1 = 0;
1100         }
1101       if (s2 < 0)
1102         {
1103           s1 = ZV_BYTE - BEGV_BYTE;
1104           s2 = 0;
1105         }
1106       re_match_object = Qnil;
1107
1108       while (n < 0)
1109         {
1110           int val;
1111           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1112                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1113                              &search_regs,
1114                              /* Don't allow match past current point */
1115                              pos_byte - BEGV_BYTE);
1116           if (val == -2)
1117             {
1118               matcher_overflow ();
1119             }
1120           if (val >= 0)
1121             {
1122               pos_byte = search_regs.start[0] + BEGV_BYTE;
1123               for (i = 0; i < search_regs.num_regs; i++)
1124                 if (search_regs.start[i] >= 0)
1125                   {
1126                     search_regs.start[i]
1127                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1128                     search_regs.end[i]
1129                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1130                   }
1131               XSETBUFFER (last_thing_searched, current_buffer);
1132               /* Set pos to the new position. */
1133               pos = search_regs.start[0];
1134             }
1135           else
1136             {
1137               immediate_quit = 0;
1138               return (n);
1139             }
1140           n++;
1141         }
1142       while (n > 0)
1143         {
1144           int val;
1145           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1146                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1147                              &search_regs,
1148                              lim_byte - BEGV_BYTE);
1149           if (val == -2)
1150             {
1151               matcher_overflow ();
1152             }
1153           if (val >= 0)
1154             {
1155               pos_byte = search_regs.end[0] + BEGV_BYTE;
1156               for (i = 0; i < search_regs.num_regs; i++)
1157                 if (search_regs.start[i] >= 0)
1158                   {
1159                     search_regs.start[i]
1160                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1161                     search_regs.end[i]
1162                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1163                   }
1164               XSETBUFFER (last_thing_searched, current_buffer);
1165               pos = search_regs.end[0];
1166             }
1167           else
1168             {
1169               immediate_quit = 0;
1170               return (0 - n);
1171             }
1172           n--;
1173         }
1174       immediate_quit = 0;
1175       return (pos);
1176     }
1177   else                          /* non-RE case */
1178     {
1179       unsigned char *raw_pattern, *pat;
1180       int raw_pattern_size;
1181       int raw_pattern_size_byte;
1182       unsigned char *patbuf;
1183       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1184       unsigned char *base_pat;
1185       /* Set to positive if we find a non-ASCII char that need
1186          translation.  Otherwise set to zero later.  */
1187       int char_base = -1;
1188       int boyer_moore_ok = 1;
1189
1190       /* MULTIBYTE says whether the text to be searched is multibyte.
1191          We must convert PATTERN to match that, or we will not really
1192          find things right.  */
1193
1194       if (multibyte == STRING_MULTIBYTE (string))
1195         {
1196           raw_pattern = (unsigned char *) SDATA (string);
1197           raw_pattern_size = SCHARS (string);
1198           raw_pattern_size_byte = SBYTES (string);
1199         }
1200       else if (multibyte)
1201         {
1202           raw_pattern_size = SCHARS (string);
1203           raw_pattern_size_byte
1204             = count_size_as_multibyte (SDATA (string),
1205                                        raw_pattern_size);
1206           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1207           copy_text (SDATA (string), raw_pattern,
1208                      SCHARS (string), 0, 1);
1209         }
1210       else
1211         {
1212           /* Converting multibyte to single-byte.
1213
1214              ??? Perhaps this conversion should be done in a special way
1215              by subtracting nonascii-insert-offset from each non-ASCII char,
1216              so that only the multibyte chars which really correspond to
1217              the chosen single-byte character set can possibly match.  */
1218           raw_pattern_size = SCHARS (string);
1219           raw_pattern_size_byte = SCHARS (string);
1220           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1221           copy_text (SDATA (string), raw_pattern,
1222                      SBYTES (string), 1, 0);
1223         }
1224
1225       /* Copy and optionally translate the pattern.  */
1226       len = raw_pattern_size;
1227       len_byte = raw_pattern_size_byte;
1228       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1229       pat = patbuf;
1230       base_pat = raw_pattern;
1231       if (multibyte)
1232         {
1233           /* Fill patbuf by translated characters in STRING while
1234              checking if we can use boyer-moore search.  If TRT is
1235              non-nil, we can use boyer-moore search only if TRT can be
1236              represented by the byte array of 256 elements.  For that,
1237              all non-ASCII case-equivalents of all case-senstive
1238              characters in STRING must belong to the same charset and
1239              row.  */
1240
1241           while (--len >= 0)
1242             {
1243               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1244               int c, translated, inverse;
1245               int in_charlen, charlen;
1246
1247               /* If we got here and the RE flag is set, it's because we're
1248                  dealing with a regexp known to be trivial, so the backslash
1249                  just quotes the next character.  */
1250               if (RE && *base_pat == '\\')
1251                 {
1252                   len--;
1253                   raw_pattern_size--;
1254                   len_byte--;
1255                   base_pat++;
1256                 }
1257
1258               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1259
1260               if (NILP (trt))
1261                 {
1262                   str = base_pat;
1263                   charlen = in_charlen;
1264                 }
1265               else
1266                 {
1267                   /* Translate the character.  */
1268                   TRANSLATE (translated, trt, c);
1269                   charlen = CHAR_STRING (translated, str_base);
1270                   str = str_base;
1271
1272                   /* Check if C has any other case-equivalents.  */
1273                   TRANSLATE (inverse, inverse_trt, c);
1274                   /* If so, check if we can use boyer-moore.  */
1275                   if (c != inverse && boyer_moore_ok)
1276                     {
1277                       /* Check if all equivalents belong to the same
1278                          group of characters.  Note that the check of C
1279                          itself is done by the last iteration.  */
1280                       int this_char_base = -1;
1281
1282                       while (boyer_moore_ok)
1283                         {
1284                           if (ASCII_BYTE_P (inverse))
1285                             {
1286                               if (this_char_base > 0)
1287                                 boyer_moore_ok = 0;
1288                               else
1289                                 {
1290                                   this_char_base = 0;
1291                                   if (char_base < 0)
1292                                     char_base = this_char_base;
1293                                 }
1294                             }
1295                           else if (CHAR_BYTE8_P (inverse))
1296                             /* Boyer-moore search can't handle a
1297                                translation of an eight-bit
1298                                character.  */
1299                             boyer_moore_ok = 0;
1300                           else if (this_char_base < 0)
1301                             {
1302                               this_char_base = inverse & ~0x3F;
1303                               if (char_base < 0)
1304                                 char_base = this_char_base;
1305                               else if (char_base > 0
1306                                        && this_char_base != char_base)
1307                                 boyer_moore_ok = 0;
1308                             }
1309                           else if ((inverse & ~0x3F) != this_char_base)
1310                             boyer_moore_ok = 0;
1311                           if (c == inverse)
1312                             break;
1313                           TRANSLATE (inverse, inverse_trt, inverse);
1314                         }
1315                     }
1316                 }
1317               if (char_base < 0)
1318                 char_base = 0;
1319
1320               /* Store this character into the translated pattern.  */
1321               bcopy (str, pat, charlen);
1322               pat += charlen;
1323               base_pat += in_charlen;
1324               len_byte -= in_charlen;
1325             }
1326         }
1327       else
1328         {
1329           /* Unibyte buffer.  */
1330           char_base = 0;
1331           while (--len >= 0)
1332             {
1333               int c, translated;
1334
1335               /* If we got here and the RE flag is set, it's because we're
1336                  dealing with a regexp known to be trivial, so the backslash
1337                  just quotes the next character.  */
1338               if (RE && *base_pat == '\\')
1339                 {
1340                   len--;
1341                   raw_pattern_size--;
1342                   base_pat++;
1343                 }
1344               c = *base_pat++;
1345               TRANSLATE (translated, trt, c);
1346               *pat++ = translated;
1347             }
1348         }
1349
1350       len_byte = pat - patbuf;
1351       len = raw_pattern_size;
1352       pat = base_pat = patbuf;
1353
1354       if (boyer_moore_ok)
1355         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1356                             pos, pos_byte, lim, lim_byte,
1357                             char_base);
1358       else
1359         return simple_search (n, pat, len, len_byte, trt,
1360                               pos, pos_byte, lim, lim_byte);
1361     }
1362 }
1363 \f
1364 /* Do a simple string search N times for the string PAT,
1365    whose length is LEN/LEN_BYTE,
1366    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1367    TRT is the translation table.
1368
1369    Return the character position where the match is found.
1370    Otherwise, if M matches remained to be found, return -M.
1371
1372    This kind of search works regardless of what is in PAT and
1373    regardless of what is in TRT.  It is used in cases where
1374    boyer_moore cannot work.  */
1375
1376 static int
1377 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1378      int n;
1379      unsigned char *pat;
1380      int len, len_byte;
1381      Lisp_Object trt;
1382      int pos, pos_byte;
1383      int lim, lim_byte;
1384 {
1385   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1386   int forward = n > 0;
1387   /* Number of buffer bytes matched.  Note that this may be different
1388      from len_byte in a multibyte buffer.  */
1389   int match_byte;
1390
1391   if (lim > pos && multibyte)
1392     while (n > 0)
1393       {
1394         while (1)
1395           {
1396             /* Try matching at position POS.  */
1397             int this_pos = pos;
1398             int this_pos_byte = pos_byte;
1399             int this_len = len;
1400             int this_len_byte = len_byte;
1401             unsigned char *p = pat;
1402             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1403               goto stop;
1404
1405             while (this_len > 0)
1406               {
1407                 int charlen, buf_charlen;
1408                 int pat_ch, buf_ch;
1409
1410                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1411                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1412                                                  ZV_BYTE - this_pos_byte,
1413                                                  buf_charlen);
1414                 TRANSLATE (buf_ch, trt, buf_ch);
1415
1416                 if (buf_ch != pat_ch)
1417                   break;
1418
1419                 this_len_byte -= charlen;
1420                 this_len--;
1421                 p += charlen;
1422
1423                 this_pos_byte += buf_charlen;
1424                 this_pos++;
1425               }
1426
1427             if (this_len == 0)
1428               {
1429                 match_byte = this_pos_byte - pos_byte;
1430                 pos += len;
1431                 pos_byte += match_byte;
1432                 break;
1433               }
1434
1435             INC_BOTH (pos, pos_byte);
1436           }
1437
1438         n--;
1439       }
1440   else if (lim > pos)
1441     while (n > 0)
1442       {
1443         while (1)
1444           {
1445             /* Try matching at position POS.  */
1446             int this_pos = pos;
1447             int this_len = len;
1448             unsigned char *p = pat;
1449
1450             if (pos + len > lim)
1451               goto stop;
1452
1453             while (this_len > 0)
1454               {
1455                 int pat_ch = *p++;
1456                 int buf_ch = FETCH_BYTE (this_pos);
1457                 TRANSLATE (buf_ch, trt, buf_ch);
1458
1459                 if (buf_ch != pat_ch)
1460                   break;
1461
1462                 this_len--;
1463                 this_pos++;
1464               }
1465
1466             if (this_len == 0)
1467               {
1468                 match_byte = len;
1469                 pos += len;
1470                 break;
1471               }
1472
1473             pos++;
1474           }
1475
1476         n--;
1477       }
1478   /* Backwards search.  */
1479   else if (lim < pos && multibyte)
1480     while (n < 0)
1481       {
1482         while (1)
1483           {
1484             /* Try matching at position POS.  */
1485             int this_pos = pos - len;
1486             int this_pos_byte;
1487             int this_len = len;
1488             int this_len_byte = len_byte;
1489             unsigned char *p = pat;
1490
1491             if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
1492               goto stop;
1493             this_pos_byte = CHAR_TO_BYTE (this_pos);
1494             match_byte = pos_byte - this_pos_byte;
1495
1496             while (this_len > 0)
1497               {
1498                 int charlen, buf_charlen;
1499                 int pat_ch, buf_ch;
1500
1501                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1502                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1503                                                  ZV_BYTE - this_pos_byte,
1504                                                  buf_charlen);
1505                 TRANSLATE (buf_ch, trt, buf_ch);
1506
1507                 if (buf_ch != pat_ch)
1508                   break;
1509
1510                 this_len_byte -= charlen;
1511                 this_len--;
1512                 p += charlen;
1513                 this_pos_byte += buf_charlen;
1514                 this_pos++;
1515               }
1516
1517             if (this_len == 0)
1518               {
1519                 pos -= len;
1520                 pos_byte -= match_byte;
1521                 break;
1522               }
1523
1524             DEC_BOTH (pos, pos_byte);
1525           }
1526
1527         n++;
1528       }
1529   else if (lim < pos)
1530     while (n < 0)
1531       {
1532         while (1)
1533           {
1534             /* Try matching at position POS.  */
1535             int this_pos = pos - len;
1536             int this_len = len;
1537             unsigned char *p = pat;
1538
1539             if (this_pos < lim)
1540               goto stop;
1541
1542             while (this_len > 0)
1543               {
1544                 int pat_ch = *p++;
1545                 int buf_ch = FETCH_BYTE (this_pos);
1546                 TRANSLATE (buf_ch, trt, buf_ch);
1547
1548                 if (buf_ch != pat_ch)
1549                   break;
1550                 this_len--;
1551                 this_pos++;
1552               }
1553
1554             if (this_len == 0)
1555               {
1556                 match_byte = len;
1557                 pos -= len;
1558                 break;
1559               }
1560
1561             pos--;
1562           }
1563
1564         n++;
1565       }
1566
1567  stop:
1568   if (n == 0)
1569     {
1570       if (forward)
1571         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1572       else
1573         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1574
1575       return pos;
1576     }
1577   else if (n > 0)
1578     return -n;
1579   else
1580     return n;
1581 }
1582 \f
1583 /* Do Boyer-Moore search N times for the string BASE_PAT,
1584    whose length is LEN/LEN_BYTE,
1585    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1586    DIRECTION says which direction we search in.
1587    TRT and INVERSE_TRT are translation tables.
1588    Characters in PAT are already translated by TRT.
1589
1590    This kind of search works if all the characters in BASE_PAT that
1591    have nontrivial translation are the same aside from the last byte.
1592    This makes it possible to translate just the last byte of a
1593    character, and do so after just a simple test of the context.
1594    CHAR_BASE is nonzero iff there is such a non-ASCII character.
1595
1596    If that criterion is not satisfied, do not call this function.  */
1597
1598 static int
1599 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1600              pos, pos_byte, lim, lim_byte, char_base)
1601      int n;
1602      unsigned char *base_pat;
1603      int len, len_byte;
1604      Lisp_Object trt;
1605      Lisp_Object inverse_trt;
1606      int pos, pos_byte;
1607      int lim, lim_byte;
1608      int char_base;
1609 {
1610   int direction = ((n > 0) ? 1 : -1);
1611   register int dirlen;
1612   int infinity, limit, stride_for_teases = 0;
1613   register int *BM_tab;
1614   int *BM_tab_base;
1615   register unsigned char *cursor, *p_limit;
1616   register int i, j;
1617   unsigned char *pat, *pat_end;
1618   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1619
1620   unsigned char simple_translate[0400];
1621   /* These are set to the preceding bytes of a byte to be translated
1622      if char_base is nonzero.  As the maximum byte length of a
1623      multibyte character is 5, we have to check at most four previous
1624      bytes.  */
1625   int translate_prev_byte1 = 0;
1626   int translate_prev_byte2 = 0;
1627   int translate_prev_byte3 = 0;
1628   int translate_prev_byte4 = 0;
1629
1630 #ifdef C_ALLOCA
1631   int BM_tab_space[0400];
1632   BM_tab = &BM_tab_space[0];
1633 #else
1634   BM_tab = (int *) alloca (0400 * sizeof (int));
1635 #endif
1636   /* The general approach is that we are going to maintain that we know */
1637   /* the first (closest to the present position, in whatever direction */
1638   /* we're searching) character that could possibly be the last */
1639   /* (furthest from present position) character of a valid match.  We */
1640   /* advance the state of our knowledge by looking at that character */
1641   /* and seeing whether it indeed matches the last character of the */
1642   /* pattern.  If it does, we take a closer look.  If it does not, we */
1643   /* move our pointer (to putative last characters) as far as is */
1644   /* logically possible.  This amount of movement, which I call a */
1645   /* stride, will be the length of the pattern if the actual character */
1646   /* appears nowhere in the pattern, otherwise it will be the distance */
1647   /* from the last occurrence of that character to the end of the */
1648   /* pattern. */
1649   /* As a coding trick, an enormous stride is coded into the table for */
1650   /* characters that match the last character.  This allows use of only */
1651   /* a single test, a test for having gone past the end of the */
1652   /* permissible match region, to test for both possible matches (when */
1653   /* the stride goes past the end immediately) and failure to */
1654   /* match (where you get nudged past the end one stride at a time). */
1655
1656   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1657   /* is determined only by the last character of the putative match. */
1658   /* If that character does not match, we will stride the proper */
1659   /* distance to propose a match that superimposes it on the last */
1660   /* instance of a character that matches it (per trt), or misses */
1661   /* it entirely if there is none. */
1662
1663   dirlen = len_byte * direction;
1664   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1665
1666   /* Record position after the end of the pattern.  */
1667   pat_end = base_pat + len_byte;
1668   /* BASE_PAT points to a character that we start scanning from.
1669      It is the first character in a forward search,
1670      the last character in a backward search.  */
1671   if (direction < 0)
1672     base_pat = pat_end - 1;
1673
1674   BM_tab_base = BM_tab;
1675   BM_tab += 0400;
1676   j = dirlen;           /* to get it in a register */
1677   /* A character that does not appear in the pattern induces a */
1678   /* stride equal to the pattern length. */
1679   while (BM_tab_base != BM_tab)
1680     {
1681       *--BM_tab = j;
1682       *--BM_tab = j;
1683       *--BM_tab = j;
1684       *--BM_tab = j;
1685     }
1686
1687   /* We use this for translation, instead of TRT itself.
1688      We fill this in to handle the characters that actually
1689      occur in the pattern.  Others don't matter anyway!  */
1690   bzero (simple_translate, sizeof simple_translate);
1691   for (i = 0; i < 0400; i++)
1692     simple_translate[i] = i;
1693
1694   if (char_base)
1695     {
1696       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1697          byte following them are the target of translation.  */
1698       unsigned char str[MAX_MULTIBYTE_LENGTH];
1699       int len = CHAR_STRING (char_base, str);
1700
1701       translate_prev_byte1 = str[len - 2];
1702       if (len > 2)
1703         {
1704           translate_prev_byte2 = str[len - 3];
1705           if (len > 3)
1706             {
1707               translate_prev_byte3 = str[len - 4];
1708               if (len > 4)
1709                 translate_prev_byte4 = str[len - 5];
1710             }
1711         }
1712     }
1713
1714   i = 0;
1715   while (i != infinity)
1716     {
1717       unsigned char *ptr = base_pat + i;
1718       i += direction;
1719       if (i == dirlen)
1720         i = infinity;
1721       if (! NILP (trt))
1722         {
1723           /* If the byte currently looking at is the last of a
1724              character to check case-equivalents, set CH to that
1725              character.  An ASCII character and a non-ASCII character
1726              matching with CHAR_BASE are to be checked.  */
1727           int ch = -1;
1728
1729           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1730             ch = *ptr;
1731           else if (char_base
1732                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1733             {
1734               unsigned char *charstart = ptr - 1;
1735
1736               while (! (CHAR_HEAD_P (*charstart)))
1737                 charstart--;
1738               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1739               if (char_base != (ch & ~0x3F))
1740                 ch = -1;
1741             }
1742
1743           if (ch >= 0400)
1744             j = (ch & 0x3F) | 0200;
1745           else
1746             j = *ptr;
1747
1748           if (i == infinity)
1749             stride_for_teases = BM_tab[j];
1750
1751           BM_tab[j] = dirlen - i;
1752           /* A translation table is accompanied by its inverse -- see */
1753           /* comment following downcase_table for details */
1754           if (ch >= 0)
1755             {
1756               int starting_ch = ch;
1757               int starting_j = j;
1758
1759               while (1)
1760                 {
1761                   TRANSLATE (ch, inverse_trt, ch);
1762                   if (ch >= 0400)
1763                     j = (ch & 0x3F) | 0200;
1764                   else
1765                     j = ch;
1766
1767                   /* For all the characters that map into CH,
1768                      set up simple_translate to map the last byte
1769                      into STARTING_J.  */
1770                   simple_translate[j] = starting_j;
1771                   if (ch == starting_ch)
1772                     break;
1773                   BM_tab[j] = dirlen - i;
1774                 }
1775             }
1776         }
1777       else
1778         {
1779           j = *ptr;
1780
1781           if (i == infinity)
1782             stride_for_teases = BM_tab[j];
1783           BM_tab[j] = dirlen - i;
1784         }
1785       /* stride_for_teases tells how much to stride if we get a */
1786       /* match on the far character but are subsequently */
1787       /* disappointed, by recording what the stride would have been */
1788       /* for that character if the last character had been */
1789       /* different. */
1790     }
1791   infinity = dirlen - infinity;
1792   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1793   /* loop invariant - POS_BYTE points at where last char (first
1794      char if reverse) of pattern would align in a possible match.  */
1795   while (n != 0)
1796     {
1797       int tail_end;
1798       unsigned char *tail_end_ptr;
1799
1800       /* It's been reported that some (broken) compiler thinks that
1801          Boolean expressions in an arithmetic context are unsigned.
1802          Using an explicit ?1:0 prevents this.  */
1803       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1804           < 0)
1805         return (n * (0 - direction));
1806       /* First we do the part we can by pointers (maybe nothing) */
1807       QUIT;
1808       pat = base_pat;
1809       limit = pos_byte - dirlen + direction;
1810       if (direction > 0)
1811         {
1812           limit = BUFFER_CEILING_OF (limit);
1813           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1814              can take on without hitting edge of buffer or the gap.  */
1815           limit = min (limit, pos_byte + 20000);
1816           limit = min (limit, lim_byte - 1);
1817         }
1818       else
1819         {
1820           limit = BUFFER_FLOOR_OF (limit);
1821           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1822              can take on without hitting edge of buffer or the gap.  */
1823           limit = max (limit, pos_byte - 20000);
1824           limit = max (limit, lim_byte);
1825         }
1826       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1827       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1828
1829       if ((limit - pos_byte) * direction > 20)
1830         {
1831           unsigned char *p2;
1832
1833           p_limit = BYTE_POS_ADDR (limit);
1834           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1835           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1836           while (1)             /* use one cursor setting as long as i can */
1837             {
1838               if (direction > 0) /* worth duplicating */
1839                 {
1840                   /* Use signed comparison if appropriate
1841                      to make cursor+infinity sure to be > p_limit.
1842                      Assuming that the buffer lies in a range of addresses
1843                      that are all "positive" (as ints) or all "negative",
1844                      either kind of comparison will work as long
1845                      as we don't step by infinity.  So pick the kind
1846                      that works when we do step by infinity.  */
1847                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1848                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1849                       cursor += BM_tab[*cursor];
1850                   else
1851                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1852                       cursor += BM_tab[*cursor];
1853                 }
1854               else
1855                 {
1856                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1857                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1858                       cursor += BM_tab[*cursor];
1859                   else
1860                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1861                       cursor += BM_tab[*cursor];
1862                 }
1863 /* If you are here, cursor is beyond the end of the searched region. */
1864 /* This can happen if you match on the far character of the pattern, */
1865 /* because the "stride" of that character is infinity, a number able */
1866 /* to throw you well beyond the end of the search.  It can also */
1867 /* happen if you fail to match within the permitted region and would */
1868 /* otherwise try a character beyond that region */
1869               if ((cursor - p_limit) * direction <= len_byte)
1870                 break;  /* a small overrun is genuine */
1871               cursor -= infinity; /* large overrun = hit */
1872               i = dirlen - direction;
1873               if (! NILP (trt))
1874                 {
1875                   while ((i -= direction) + direction != 0)
1876                     {
1877                       int ch;
1878                       cursor -= direction;
1879                       /* Translate only the last byte of a character.  */
1880                       if (! multibyte
1881                           || ((cursor == tail_end_ptr
1882                                || CHAR_HEAD_P (cursor[1]))
1883                               && (CHAR_HEAD_P (cursor[0])
1884                                   /* Check if this is the last byte of
1885                                      a translable character.  */
1886                                   || (translate_prev_byte1 == cursor[-1]
1887                                       && (CHAR_HEAD_P (translate_prev_byte1)
1888                                           || (translate_prev_byte2 == cursor[-2]
1889                                               && (CHAR_HEAD_P (translate_prev_byte2)
1890                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1891                         ch = simple_translate[*cursor];
1892                       else
1893                         ch = *cursor;
1894                       if (pat[i] != ch)
1895                         break;
1896                     }
1897                 }
1898               else
1899                 {
1900                   while ((i -= direction) + direction != 0)
1901                     {
1902                       cursor -= direction;
1903                       if (pat[i] != *cursor)
1904                         break;
1905                     }
1906                 }
1907               cursor += dirlen - i - direction; /* fix cursor */
1908               if (i + direction == 0)
1909                 {
1910                   int position;
1911
1912                   cursor -= direction;
1913
1914                   position = pos_byte + cursor - p2 + ((direction > 0)
1915                                                        ? 1 - len_byte : 0);
1916                   set_search_regs (position, len_byte);
1917
1918                   if ((n -= direction) != 0)
1919                     cursor += dirlen; /* to resume search */
1920                   else
1921                     return ((direction > 0)
1922                             ? search_regs.end[0] : search_regs.start[0]);
1923                 }
1924               else
1925                 cursor += stride_for_teases; /* <sigh> we lose -  */
1926             }
1927           pos_byte += cursor - p2;
1928         }
1929       else
1930         /* Now we'll pick up a clump that has to be done the hard */
1931         /* way because it covers a discontinuity */
1932         {
1933           limit = ((direction > 0)
1934                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1935                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1936           limit = ((direction > 0)
1937                    ? min (limit + len_byte, lim_byte - 1)
1938                    : max (limit - len_byte, lim_byte));
1939           /* LIMIT is now the last value POS_BYTE can have
1940              and still be valid for a possible match.  */
1941           while (1)
1942             {
1943               /* This loop can be coded for space rather than */
1944               /* speed because it will usually run only once. */
1945               /* (the reach is at most len + 21, and typically */
1946               /* does not exceed len) */
1947               while ((limit - pos_byte) * direction >= 0)
1948                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1949               /* now run the same tests to distinguish going off the */
1950               /* end, a match or a phony match. */
1951               if ((pos_byte - limit) * direction <= len_byte)
1952                 break;  /* ran off the end */
1953               /* Found what might be a match.
1954                  Set POS_BYTE back to last (first if reverse) pos.  */
1955               pos_byte -= infinity;
1956               i = dirlen - direction;
1957               while ((i -= direction) + direction != 0)
1958                 {
1959                   int ch;
1960                   unsigned char *ptr;
1961                   pos_byte -= direction;
1962                   ptr = BYTE_POS_ADDR (pos_byte);
1963                   /* Translate only the last byte of a character.  */
1964                   if (! multibyte
1965                       || ((ptr == tail_end_ptr
1966                            || CHAR_HEAD_P (ptr[1]))
1967                           && (CHAR_HEAD_P (ptr[0])
1968                               /* Check if this is the last byte of a
1969                                  translable character.  */
1970                               || (translate_prev_byte1 == ptr[-1]
1971                                   && (CHAR_HEAD_P (translate_prev_byte1)
1972                                       || (translate_prev_byte2 == ptr[-2]
1973                                           && (CHAR_HEAD_P (translate_prev_byte2)
1974                                               || translate_prev_byte3 == ptr[-3])))))))
1975                     ch = simple_translate[*ptr];
1976                   else
1977                     ch = *ptr;
1978                   if (pat[i] != ch)
1979                     break;
1980                 }
1981               /* Above loop has moved POS_BYTE part or all the way
1982                  back to the first pos (last pos if reverse).
1983                  Set it once again at the last (first if reverse) char.  */
1984               pos_byte += dirlen - i- direction;
1985               if (i + direction == 0)
1986                 {
1987                   int position;
1988                   pos_byte -= direction;
1989
1990                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1991
1992                   set_search_regs (position, len_byte);
1993
1994                   if ((n -= direction) != 0)
1995                     pos_byte += dirlen; /* to resume search */
1996                   else
1997                     return ((direction > 0)
1998                             ? search_regs.end[0] : search_regs.start[0]);
1999                 }
2000               else
2001                 pos_byte += stride_for_teases;
2002             }
2003           }
2004       /* We have done one clump.  Can we continue? */
2005       if ((lim_byte - pos_byte) * direction < 0)
2006         return ((0 - n) * direction);
2007     }
2008   return BYTE_TO_CHAR (pos_byte);
2009 }
2010
2011 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2012    for the overall match just found in the current buffer.
2013    Also clear out the match data for registers 1 and up.  */
2014
2015 static void
2016 set_search_regs (beg_byte, nbytes)
2017      int beg_byte, nbytes;
2018 {
2019   int i;
2020
2021   /* Make sure we have registers in which to store
2022      the match position.  */
2023   if (search_regs.num_regs == 0)
2024     {
2025       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2026       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2027       search_regs.num_regs = 2;
2028     }
2029
2030   /* Clear out the other registers.  */
2031   for (i = 1; i < search_regs.num_regs; i++)
2032     {
2033       search_regs.start[i] = -1;
2034       search_regs.end[i] = -1;
2035     }
2036
2037   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2038   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2039   XSETBUFFER (last_thing_searched, current_buffer);
2040 }
2041 \f
2042 /* Given a string of words separated by word delimiters,
2043   compute a regexp that matches those exact words
2044   separated by arbitrary punctuation.  */
2045
2046 static Lisp_Object
2047 wordify (string)
2048      Lisp_Object string;
2049 {
2050   register unsigned char *p, *o;
2051   register int i, i_byte, len, punct_count = 0, word_count = 0;
2052   Lisp_Object val;
2053   int prev_c = 0;
2054   int adjust;
2055
2056   CHECK_STRING (string);
2057   p = SDATA (string);
2058   len = SCHARS (string);
2059
2060   for (i = 0, i_byte = 0; i < len; )
2061     {
2062       int c;
2063
2064       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2065
2066       if (SYNTAX (c) != Sword)
2067         {
2068           punct_count++;
2069           if (i > 0 && SYNTAX (prev_c) == Sword)
2070             word_count++;
2071         }
2072
2073       prev_c = c;
2074     }
2075
2076   if (SYNTAX (prev_c) == Sword)
2077     word_count++;
2078   if (!word_count)
2079     return empty_string;
2080
2081   adjust = - punct_count + 5 * (word_count - 1) + 4;
2082   if (STRING_MULTIBYTE (string))
2083     val = make_uninit_multibyte_string (len + adjust,
2084                                         SBYTES (string)
2085                                         + adjust);
2086   else
2087     val = make_uninit_string (len + adjust);
2088
2089   o = SDATA (val);
2090   *o++ = '\\';
2091   *o++ = 'b';
2092   prev_c = 0;
2093
2094   for (i = 0, i_byte = 0; i < len; )
2095     {
2096       int c;
2097       int i_byte_orig = i_byte;
2098
2099       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2100
2101       if (SYNTAX (c) == Sword)
2102         {
2103           bcopy (SDATA (string) + i_byte_orig, o,
2104                  i_byte - i_byte_orig);
2105           o += i_byte - i_byte_orig;
2106         }
2107       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2108         {
2109           *o++ = '\\';
2110           *o++ = 'W';
2111           *o++ = '\\';
2112           *o++ = 'W';
2113           *o++ = '*';
2114         }
2115
2116       prev_c = c;
2117     }
2118
2119   *o++ = '\\';
2120   *o++ = 'b';
2121
2122   return val;
2123 }
2124 \f
2125 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2126        "MSearch backward: ",
2127        doc: /* Search backward from point for STRING.
2128 Set point to the beginning of the occurrence found, and return point.
2129 An optional second argument bounds the search; it is a buffer position.
2130 The match found must not extend before that position.
2131 Optional third argument, if t, means if fail just return nil (no error).
2132  If not nil and not t, position at limit of search and return nil.
2133 Optional fourth argument is repeat count--search for successive occurrences.
2134
2135 Search case-sensitivity is determined by the value of the variable
2136 `case-fold-search', which see.
2137
2138 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2139      (string, bound, noerror, count)
2140      Lisp_Object string, bound, noerror, count;
2141 {
2142   return search_command (string, bound, noerror, count, -1, 0, 0);
2143 }
2144
2145 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2146        doc: /* Search forward from point for STRING.
2147 Set point to the end of the occurrence found, and return point.
2148 An optional second argument bounds the search; it is a buffer position.
2149 The match found must not extend after that position.  A value of nil is
2150   equivalent to (point-max).
2151 Optional third argument, if t, means if fail just return nil (no error).
2152   If not nil and not t, move to limit of search and return nil.
2153 Optional fourth argument is repeat count--search for successive occurrences.
2154
2155 Search case-sensitivity is determined by the value of the variable
2156 `case-fold-search', which see.
2157
2158 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2159      (string, bound, noerror, count)
2160      Lisp_Object string, bound, noerror, count;
2161 {
2162   return search_command (string, bound, noerror, count, 1, 0, 0);
2163 }
2164
2165 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2166        "sWord search backward: ",
2167        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2168 Set point to the beginning of the occurrence found, and return point.
2169 An optional second argument bounds the search; it is a buffer position.
2170 The match found must not extend before that position.
2171 Optional third argument, if t, means if fail just return nil (no error).
2172   If not nil and not t, move to limit of search and return nil.
2173 Optional fourth argument is repeat count--search for successive occurrences.  */)
2174      (string, bound, noerror, count)
2175      Lisp_Object string, bound, noerror, count;
2176 {
2177   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2178 }
2179
2180 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2181        "sWord search: ",
2182        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2183 Set point to the end of the occurrence found, and return point.
2184 An optional second argument bounds the search; it is a buffer position.
2185 The match found must not extend after that position.
2186 Optional third argument, if t, means if fail just return nil (no error).
2187   If not nil and not t, move to limit of search and return nil.
2188 Optional fourth argument is repeat count--search for successive occurrences.  */)
2189      (string, bound, noerror, count)
2190      Lisp_Object string, bound, noerror, count;
2191 {
2192   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2193 }
2194
2195 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2196        "sRE search backward: ",
2197        doc: /* Search backward from point for match for regular expression REGEXP.
2198 Set point to the beginning of the match, and return point.
2199 The match found is the one starting last in the buffer
2200 and yet ending before the origin of the search.
2201 An optional second argument bounds the search; it is a buffer position.
2202 The match found must start at or after that position.
2203 Optional third argument, if t, means if fail just return nil (no error).
2204   If not nil and not t, move to limit of search and return nil.
2205 Optional fourth argument is repeat count--search for successive occurrences.
2206 See also the functions `match-beginning', `match-end', `match-string',
2207 and `replace-match'.  */)
2208      (regexp, bound, noerror, count)
2209      Lisp_Object regexp, bound, noerror, count;
2210 {
2211   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2212 }
2213
2214 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2215        "sRE search: ",
2216        doc: /* Search forward from point for regular expression REGEXP.
2217 Set point to the end of the occurrence found, and return point.
2218 An optional second argument bounds the search; it is a buffer position.
2219 The match found must not extend after that position.
2220 Optional third argument, if t, means if fail just return nil (no error).
2221   If not nil and not t, move to limit of search and return nil.
2222 Optional fourth argument is repeat count--search for successive occurrences.
2223 See also the functions `match-beginning', `match-end', `match-string',
2224 and `replace-match'.  */)
2225      (regexp, bound, noerror, count)
2226      Lisp_Object regexp, bound, noerror, count;
2227 {
2228   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2229 }
2230
2231 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2232        "sPosix search backward: ",
2233        doc: /* Search backward from point for match for regular expression REGEXP.
2234 Find the longest match in accord with Posix regular expression rules.
2235 Set point to the beginning of the match, and return point.
2236 The match found is the one starting last in the buffer
2237 and yet ending before the origin of the search.
2238 An optional second argument bounds the search; it is a buffer position.
2239 The match found must start at or after that position.
2240 Optional third argument, if t, means if fail just return nil (no error).
2241   If not nil and not t, move to limit of search and return nil.
2242 Optional fourth argument is repeat count--search for successive occurrences.
2243 See also the functions `match-beginning', `match-end', `match-string',
2244 and `replace-match'.  */)
2245      (regexp, bound, noerror, count)
2246      Lisp_Object regexp, bound, noerror, count;
2247 {
2248   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2249 }
2250
2251 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2252        "sPosix search: ",
2253        doc: /* Search forward from point for regular expression REGEXP.
2254 Find the longest match in accord with Posix regular expression rules.
2255 Set point to the end of the occurrence found, and return point.
2256 An optional second argument bounds the search; it is a buffer position.
2257 The match found must not extend after that position.
2258 Optional third argument, if t, means if fail just return nil (no error).
2259   If not nil and not t, move to limit of search and return nil.
2260 Optional fourth argument is repeat count--search for successive occurrences.
2261 See also the functions `match-beginning', `match-end', `match-string',
2262 and `replace-match'.  */)
2263      (regexp, bound, noerror, count)
2264      Lisp_Object regexp, bound, noerror, count;
2265 {
2266   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2267 }
2268 \f
2269 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2270        doc: /* Replace text matched by last search with NEWTEXT.
2271 Leave point at the end of the replacement text.
2272
2273 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2274 Otherwise maybe capitalize the whole text, or maybe just word initials,
2275 based on the replaced text.
2276 If the replaced text has only capital letters
2277 and has at least one multiletter word, convert NEWTEXT to all caps.
2278 Otherwise if all words are capitalized in the replaced text,
2279 capitalize each word in NEWTEXT.
2280
2281 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2282 Otherwise treat `\\' as special:
2283   `\\&' in NEWTEXT means substitute original matched text.
2284   `\\N' means substitute what matched the Nth `\\(...\\)'.
2285        If Nth parens didn't match, substitute nothing.
2286   `\\\\' means insert one `\\'.
2287 Case conversion does not apply to these substitutions.
2288
2289 FIXEDCASE and LITERAL are optional arguments.
2290
2291 The optional fourth argument STRING can be a string to modify.
2292 This is meaningful when the previous match was done against STRING,
2293 using `string-match'.  When used this way, `replace-match'
2294 creates and returns a new string made by copying STRING and replacing
2295 the part of STRING that was matched.
2296
2297 The optional fifth argument SUBEXP specifies a subexpression;
2298 it says to replace just that subexpression with NEWTEXT,
2299 rather than replacing the entire matched text.
2300 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2301 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2302 NEWTEXT in place of subexp N.
2303 This is useful only after a regular expression search or match,
2304 since only regular expressions have distinguished subexpressions.  */)
2305      (newtext, fixedcase, literal, string, subexp)
2306      Lisp_Object newtext, fixedcase, literal, string, subexp;
2307 {
2308   enum { nochange, all_caps, cap_initial } case_action;
2309   register int pos, pos_byte;
2310   int some_multiletter_word;
2311   int some_lowercase;
2312   int some_uppercase;
2313   int some_nonuppercase_initial;
2314   register int c, prevc;
2315   int sub;
2316   int opoint, newpoint;
2317
2318   CHECK_STRING (newtext);
2319
2320   if (! NILP (string))
2321     CHECK_STRING (string);
2322
2323   case_action = nochange;       /* We tried an initialization */
2324                                 /* but some C compilers blew it */
2325
2326   if (search_regs.num_regs <= 0)
2327     error ("`replace-match' called before any match found");
2328
2329   if (NILP (subexp))
2330     sub = 0;
2331   else
2332     {
2333       CHECK_NUMBER (subexp);
2334       sub = XINT (subexp);
2335       if (sub < 0 || sub >= search_regs.num_regs)
2336         args_out_of_range (subexp, make_number (search_regs.num_regs));
2337     }
2338
2339   if (NILP (string))
2340     {
2341       if (search_regs.start[sub] < BEGV
2342           || search_regs.start[sub] > search_regs.end[sub]
2343           || search_regs.end[sub] > ZV)
2344         args_out_of_range (make_number (search_regs.start[sub]),
2345                            make_number (search_regs.end[sub]));
2346     }
2347   else
2348     {
2349       if (search_regs.start[sub] < 0
2350           || search_regs.start[sub] > search_regs.end[sub]
2351           || search_regs.end[sub] > SCHARS (string))
2352         args_out_of_range (make_number (search_regs.start[sub]),
2353                            make_number (search_regs.end[sub]));
2354     }
2355
2356   if (NILP (fixedcase))
2357     {
2358       /* Decide how to casify by examining the matched text. */
2359       int last;
2360
2361       pos = search_regs.start[sub];
2362       last = search_regs.end[sub];
2363
2364       if (NILP (string))
2365         pos_byte = CHAR_TO_BYTE (pos);
2366       else
2367         pos_byte = string_char_to_byte (string, pos);
2368
2369       prevc = '\n';
2370       case_action = all_caps;
2371
2372       /* some_multiletter_word is set nonzero if any original word
2373          is more than one letter long. */
2374       some_multiletter_word = 0;
2375       some_lowercase = 0;
2376       some_nonuppercase_initial = 0;
2377       some_uppercase = 0;
2378
2379       while (pos < last)
2380         {
2381           if (NILP (string))
2382             {
2383               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2384               INC_BOTH (pos, pos_byte);
2385             }
2386           else
2387             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2388
2389           if (LOWERCASEP (c))
2390             {
2391               /* Cannot be all caps if any original char is lower case */
2392
2393               some_lowercase = 1;
2394               if (SYNTAX (prevc) != Sword)
2395                 some_nonuppercase_initial = 1;
2396               else
2397                 some_multiletter_word = 1;
2398             }
2399           else if (UPPERCASEP (c))
2400             {
2401               some_uppercase = 1;
2402               if (SYNTAX (prevc) != Sword)
2403                 ;
2404               else
2405                 some_multiletter_word = 1;
2406             }
2407           else
2408             {
2409               /* If the initial is a caseless word constituent,
2410                  treat that like a lowercase initial.  */
2411               if (SYNTAX (prevc) != Sword)
2412                 some_nonuppercase_initial = 1;
2413             }
2414
2415           prevc = c;
2416         }
2417
2418       /* Convert to all caps if the old text is all caps
2419          and has at least one multiletter word.  */
2420       if (! some_lowercase && some_multiletter_word)
2421         case_action = all_caps;
2422       /* Capitalize each word, if the old text has all capitalized words.  */
2423       else if (!some_nonuppercase_initial && some_multiletter_word)
2424         case_action = cap_initial;
2425       else if (!some_nonuppercase_initial && some_uppercase)
2426         /* Should x -> yz, operating on X, give Yz or YZ?
2427            We'll assume the latter.  */
2428         case_action = all_caps;
2429       else
2430         case_action = nochange;
2431     }
2432
2433   /* Do replacement in a string.  */
2434   if (!NILP (string))
2435     {
2436       Lisp_Object before, after;
2437
2438       before = Fsubstring (string, make_number (0),
2439                            make_number (search_regs.start[sub]));
2440       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2441
2442       /* Substitute parts of the match into NEWTEXT
2443          if desired.  */
2444       if (NILP (literal))
2445         {
2446           int lastpos = 0;
2447           int lastpos_byte = 0;
2448           /* We build up the substituted string in ACCUM.  */
2449           Lisp_Object accum;
2450           Lisp_Object middle;
2451           int length = SBYTES (newtext);
2452
2453           accum = Qnil;
2454
2455           for (pos_byte = 0, pos = 0; pos_byte < length;)
2456             {
2457               int substart = -1;
2458               int subend = 0;
2459               int delbackslash = 0;
2460
2461               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2462
2463               if (c == '\\')
2464                 {
2465                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2466
2467                   if (c == '&')
2468                     {
2469                       substart = search_regs.start[sub];
2470                       subend = search_regs.end[sub];
2471                     }
2472                   else if (c >= '1' && c <= '9')
2473                     {
2474                       if (search_regs.start[c - '0'] >= 0
2475                           && c <= search_regs.num_regs + '0')
2476                         {
2477                           substart = search_regs.start[c - '0'];
2478                           subend = search_regs.end[c - '0'];
2479                         }
2480                       else
2481                         {
2482                           /* If that subexp did not match,
2483                              replace \\N with nothing.  */
2484                           substart = 0;
2485                           subend = 0;
2486                         }
2487                     }
2488                   else if (c == '\\')
2489                     delbackslash = 1;
2490                   else
2491                     error ("Invalid use of `\\' in replacement text");
2492                 }
2493               if (substart >= 0)
2494                 {
2495                   if (pos - 2 != lastpos)
2496                     middle = substring_both (newtext, lastpos,
2497                                              lastpos_byte,
2498                                              pos - 2, pos_byte - 2);
2499                   else
2500                     middle = Qnil;
2501                   accum = concat3 (accum, middle,
2502                                    Fsubstring (string,
2503                                                make_number (substart),
2504                                                make_number (subend)));
2505                   lastpos = pos;
2506                   lastpos_byte = pos_byte;
2507                 }
2508               else if (delbackslash)
2509                 {
2510                   middle = substring_both (newtext, lastpos,
2511                                            lastpos_byte,
2512                                            pos - 1, pos_byte - 1);
2513
2514                   accum = concat2 (accum, middle);
2515                   lastpos = pos;
2516                   lastpos_byte = pos_byte;
2517                 }
2518             }
2519
2520           if (pos != lastpos)
2521             middle = substring_both (newtext, lastpos,
2522                                      lastpos_byte,
2523                                      pos, pos_byte);
2524           else
2525             middle = Qnil;
2526
2527           newtext = concat2 (accum, middle);
2528         }
2529
2530       /* Do case substitution in NEWTEXT if desired.  */
2531       if (case_action == all_caps)
2532         newtext = Fupcase (newtext);
2533       else if (case_action == cap_initial)
2534         newtext = Fupcase_initials (newtext);
2535
2536       return concat3 (before, newtext, after);
2537     }
2538
2539   /* Record point, then move (quietly) to the start of the match.  */
2540   if (PT >= search_regs.end[sub])
2541     opoint = PT - ZV;
2542   else if (PT > search_regs.start[sub])
2543     opoint = search_regs.end[sub] - ZV;
2544   else
2545     opoint = PT;
2546
2547   /* If we want non-literal replacement,
2548      perform substitution on the replacement string.  */
2549   if (NILP (literal))
2550     {
2551       int length = SBYTES (newtext);
2552       unsigned char *substed;
2553       int substed_alloc_size, substed_len;
2554       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2555       int str_multibyte = STRING_MULTIBYTE (newtext);
2556       Lisp_Object rev_tbl;
2557       int really_changed = 0;
2558
2559       rev_tbl = Qnil;
2560
2561       substed_alloc_size = length * 2 + 100;
2562       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2563       substed_len = 0;
2564
2565       /* Go thru NEWTEXT, producing the actual text to insert in
2566          SUBSTED while adjusting multibyteness to that of the current
2567          buffer.  */
2568
2569       for (pos_byte = 0, pos = 0; pos_byte < length;)
2570         {
2571           unsigned char str[MAX_MULTIBYTE_LENGTH];
2572           unsigned char *add_stuff = NULL;
2573           int add_len = 0;
2574           int idx = -1;
2575
2576           if (str_multibyte)
2577             {
2578               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2579               if (!buf_multibyte)
2580                 c = multibyte_char_to_unibyte (c, rev_tbl);
2581             }
2582           else
2583             {
2584               /* Note that we don't have to increment POS.  */
2585               c = SREF (newtext, pos_byte++);
2586               if (buf_multibyte)
2587                 c = unibyte_char_to_multibyte (c);
2588             }
2589
2590           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2591              or set IDX to a match index, which means put that part
2592              of the buffer text into SUBSTED.  */
2593
2594           if (c == '\\')
2595             {
2596               really_changed = 1;
2597
2598               if (str_multibyte)
2599                 {
2600                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2601                                                       pos, pos_byte);
2602                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2603                     c = multibyte_char_to_unibyte (c, rev_tbl);
2604                 }
2605               else
2606                 {
2607                   c = SREF (newtext, pos_byte++);
2608                   if (buf_multibyte)
2609                     c = unibyte_char_to_multibyte (c);
2610                 }
2611
2612               if (c == '&')
2613                 idx = sub;
2614               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2615                 {
2616                   if (search_regs.start[c - '0'] >= 1)
2617                     idx = c - '0';
2618                 }
2619               else if (c == '\\')
2620                 add_len = 1, add_stuff = "\\";
2621               else
2622                 {
2623                   xfree (substed);
2624                   error ("Invalid use of `\\' in replacement text");
2625                 }
2626             }
2627           else
2628             {
2629               add_len = CHAR_STRING (c, str);
2630               add_stuff = str;
2631             }
2632
2633           /* If we want to copy part of a previous match,
2634              set up ADD_STUFF and ADD_LEN to point to it.  */
2635           if (idx >= 0)
2636             {
2637               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2638               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2639               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2640                 move_gap (search_regs.start[idx]);
2641               add_stuff = BYTE_POS_ADDR (begbyte);
2642             }
2643
2644           /* Now the stuff we want to add to SUBSTED
2645              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2646
2647           /* Make sure SUBSTED is big enough.  */
2648           if (substed_len + add_len >= substed_alloc_size)
2649             {
2650               substed_alloc_size = substed_len + add_len + 500;
2651               substed = (unsigned char *) xrealloc (substed,
2652                                                     substed_alloc_size + 1);
2653             }
2654
2655           /* Now add to the end of SUBSTED.  */
2656           if (add_stuff)
2657             {
2658               bcopy (add_stuff, substed + substed_len, add_len);
2659               substed_len += add_len;
2660             }
2661         }
2662
2663       if (really_changed)
2664         {
2665           if (buf_multibyte)
2666             {
2667               int nchars = multibyte_chars_in_text (substed, substed_len);
2668
2669               newtext = make_multibyte_string (substed, nchars, substed_len);
2670             }
2671           else
2672             newtext = make_unibyte_string (substed, substed_len);
2673         }
2674       xfree (substed);
2675     }
2676
2677   /* Replace the old text with the new in the cleanest possible way.  */
2678   replace_range (search_regs.start[sub], search_regs.end[sub],
2679                  newtext, 1, 0, 1);
2680   newpoint = search_regs.start[sub] + SCHARS (newtext);
2681
2682   if (case_action == all_caps)
2683     Fupcase_region (make_number (search_regs.start[sub]),
2684                     make_number (newpoint));
2685   else if (case_action == cap_initial)
2686     Fupcase_initials_region (make_number (search_regs.start[sub]),
2687                              make_number (newpoint));
2688
2689   /* Adjust search data for this change.  */
2690   {
2691     int oldend = search_regs.end[sub];
2692     int oldstart = search_regs.start[sub];
2693     int change = newpoint - search_regs.end[sub];
2694     int i;
2695
2696     for (i = 0; i < search_regs.num_regs; i++)
2697       {
2698         if (search_regs.start[i] >= oldend)
2699           search_regs.start[i] += change;
2700         else if (search_regs.start[i] > oldstart)
2701           search_regs.start[i] = oldstart;
2702         if (search_regs.end[i] >= oldend)
2703           search_regs.end[i] += change;
2704         else if (search_regs.end[i] > oldstart)
2705           search_regs.end[i] = oldstart;
2706       }
2707   }
2708
2709   /* Put point back where it was in the text.  */
2710   if (opoint <= 0)
2711     TEMP_SET_PT (opoint + ZV);
2712   else
2713     TEMP_SET_PT (opoint);
2714
2715   /* Now move point "officially" to the start of the inserted replacement.  */
2716   move_if_not_intangible (newpoint);
2717
2718   return Qnil;
2719 }
2720 \f
2721 static Lisp_Object
2722 match_limit (num, beginningp)
2723      Lisp_Object num;
2724      int beginningp;
2725 {
2726   register int n;
2727
2728   CHECK_NUMBER (num);
2729   n = XINT (num);
2730   if (n < 0)
2731     args_out_of_range (num, make_number (0));
2732   if (search_regs.num_regs <= 0)
2733     error ("No match data, because no search succeeded");
2734   if (n >= search_regs.num_regs
2735       || search_regs.start[n] < 0)
2736     return Qnil;
2737   return (make_number ((beginningp) ? search_regs.start[n]
2738                                     : search_regs.end[n]));
2739 }
2740
2741 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2742        doc: /* Return position of start of text matched by last search.
2743 SUBEXP, a number, specifies which parenthesized expression in the last
2744   regexp.
2745 Value is nil if SUBEXPth pair didn't match, or there were less than
2746   SUBEXP pairs.
2747 Zero means the entire text matched by the whole regexp or whole string.  */)
2748      (subexp)
2749      Lisp_Object subexp;
2750 {
2751   return match_limit (subexp, 1);
2752 }
2753
2754 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2755        doc: /* Return position of end of text matched by last search.
2756 SUBEXP, a number, specifies which parenthesized expression in the last
2757   regexp.
2758 Value is nil if SUBEXPth pair didn't match, or there were less than
2759   SUBEXP pairs.
2760 Zero means the entire text matched by the whole regexp or whole string.  */)
2761      (subexp)
2762      Lisp_Object subexp;
2763 {
2764   return match_limit (subexp, 0);
2765 }
2766
2767 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2768        doc: /* Return a list containing all info on what the last search matched.
2769 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2770 All the elements are markers or nil (nil if the Nth pair didn't match)
2771 if the last match was on a buffer; integers or nil if a string was matched.
2772 Use `store-match-data' to reinstate the data in this list.
2773
2774 If INTEGERS (the optional first argument) is non-nil, always use
2775 integers \(rather than markers) to represent buffer positions.  In
2776 this case, and if the last match was in a buffer, the buffer will get
2777 stored as one additional element at the end of the list.
2778
2779 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2780 enough to hold all the values, and if INTEGERS is non-nil, no consing
2781 is done.
2782
2783 If optional third arg RESEAT is non-nil, any previous markers on the
2784 REUSE list will be modified to point to nowhere.
2785
2786 Return value is undefined if the last search failed.  */)
2787   (integers, reuse, reseat)
2788      Lisp_Object integers, reuse, reseat;
2789 {
2790   Lisp_Object tail, prev;
2791   Lisp_Object *data;
2792   int i, len;
2793
2794   if (!NILP (reseat))
2795     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2796       if (MARKERP (XCAR (tail)))
2797         {
2798           unchain_marker (XMARKER (XCAR (tail)));
2799           XSETCAR (tail, Qnil);
2800         }
2801
2802   if (NILP (last_thing_searched))
2803     return Qnil;
2804
2805   prev = Qnil;
2806
2807   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2808                                  * sizeof (Lisp_Object));
2809
2810   len = 0;
2811   for (i = 0; i < search_regs.num_regs; i++)
2812     {
2813       int start = search_regs.start[i];
2814       if (start >= 0)
2815         {
2816           if (EQ (last_thing_searched, Qt)
2817               || ! NILP (integers))
2818             {
2819               XSETFASTINT (data[2 * i], start);
2820               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2821             }
2822           else if (BUFFERP (last_thing_searched))
2823             {
2824               data[2 * i] = Fmake_marker ();
2825               Fset_marker (data[2 * i],
2826                            make_number (start),
2827                            last_thing_searched);
2828               data[2 * i + 1] = Fmake_marker ();
2829               Fset_marker (data[2 * i + 1],
2830                            make_number (search_regs.end[i]),
2831                            last_thing_searched);
2832             }
2833           else
2834             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2835             abort ();
2836
2837           len = 2 * i + 2;
2838         }
2839       else
2840         data[2 * i] = data[2 * i + 1] = Qnil;
2841     }
2842
2843   if (BUFFERP (last_thing_searched) && !NILP (integers))
2844     {
2845       data[len] = last_thing_searched;
2846       len++;
2847     }
2848
2849   /* If REUSE is not usable, cons up the values and return them.  */
2850   if (! CONSP (reuse))
2851     return Flist (len, data);
2852
2853   /* If REUSE is a list, store as many value elements as will fit
2854      into the elements of REUSE.  */
2855   for (i = 0, tail = reuse; CONSP (tail);
2856        i++, tail = XCDR (tail))
2857     {
2858       if (i < len)
2859         XSETCAR (tail, data[i]);
2860       else
2861         XSETCAR (tail, Qnil);
2862       prev = tail;
2863     }
2864
2865   /* If we couldn't fit all value elements into REUSE,
2866      cons up the rest of them and add them to the end of REUSE.  */
2867   if (i < len)
2868     XSETCDR (prev, Flist (len - i, data + i));
2869
2870   return reuse;
2871 }
2872
2873 /* Internal usage only:
2874    If RESEAT is `evaporate', put the markers back on the free list
2875    immediately.  No other references to the markers must exist in this case,
2876    so it is used only internally on the unwind stack and save-match-data from
2877    Lisp.  */
2878
2879 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2880        doc: /* Set internal data on last search match from elements of LIST.
2881 LIST should have been created by calling `match-data' previously.
2882
2883 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2884     (list, reseat)
2885      register Lisp_Object list, reseat;
2886 {
2887   register int i;
2888   register Lisp_Object marker;
2889
2890   if (running_asynch_code)
2891     save_search_regs ();
2892
2893   CHECK_LIST (list);
2894
2895   /* Unless we find a marker with a buffer or an explicit buffer
2896      in LIST, assume that this match data came from a string.  */
2897   last_thing_searched = Qt;
2898
2899   /* Allocate registers if they don't already exist.  */
2900   {
2901     int length = XFASTINT (Flength (list)) / 2;
2902
2903     if (length > search_regs.num_regs)
2904       {
2905         if (search_regs.num_regs == 0)
2906           {
2907             search_regs.start
2908               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2909             search_regs.end
2910               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2911           }
2912         else
2913           {
2914             search_regs.start
2915               = (regoff_t *) xrealloc (search_regs.start,
2916                                        length * sizeof (regoff_t));
2917             search_regs.end
2918               = (regoff_t *) xrealloc (search_regs.end,
2919                                        length * sizeof (regoff_t));
2920           }
2921
2922         for (i = search_regs.num_regs; i < length; i++)
2923           search_regs.start[i] = -1;
2924
2925         search_regs.num_regs = length;
2926       }
2927
2928     for (i = 0; CONSP (list); i++)
2929       {
2930         marker = XCAR (list);
2931         if (BUFFERP (marker))
2932           {
2933             last_thing_searched = marker;
2934             break;
2935           }
2936         if (i >= length)
2937           break;
2938         if (NILP (marker))
2939           {
2940             search_regs.start[i] = -1;
2941             list = XCDR (list);
2942           }
2943         else
2944           {
2945             int from;
2946             Lisp_Object m;
2947
2948             m = marker;
2949             if (MARKERP (marker))
2950               {
2951                 if (XMARKER (marker)->buffer == 0)
2952                   XSETFASTINT (marker, 0);
2953                 else
2954                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2955               }
2956
2957             CHECK_NUMBER_COERCE_MARKER (marker);
2958             from = XINT (marker);
2959
2960             if (!NILP (reseat) && MARKERP (m))
2961               {
2962                 if (EQ (reseat, Qevaporate))
2963                   free_marker (m);
2964                 else
2965                   unchain_marker (XMARKER (m));
2966                 XSETCAR (list, Qnil);
2967               }
2968
2969             if ((list = XCDR (list), !CONSP (list)))
2970               break;
2971
2972             m = marker = XCAR (list);
2973
2974             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2975               XSETFASTINT (marker, 0);
2976
2977             CHECK_NUMBER_COERCE_MARKER (marker);
2978             search_regs.start[i] = from;
2979             search_regs.end[i] = XINT (marker);
2980
2981             if (!NILP (reseat) && MARKERP (m))
2982               {
2983                 if (EQ (reseat, Qevaporate))
2984                   free_marker (m);
2985                 else
2986                   unchain_marker (XMARKER (m));
2987                 XSETCAR (list, Qnil);
2988               }
2989           }
2990         list = XCDR (list);
2991       }
2992
2993     for (; i < search_regs.num_regs; i++)
2994       search_regs.start[i] = -1;
2995   }
2996
2997   return Qnil;
2998 }
2999
3000 /* If non-zero the match data have been saved in saved_search_regs
3001    during the execution of a sentinel or filter. */
3002 static int search_regs_saved;
3003 static struct re_registers saved_search_regs;
3004 static Lisp_Object saved_last_thing_searched;
3005
3006 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3007    if asynchronous code (filter or sentinel) is running. */
3008 static void
3009 save_search_regs ()
3010 {
3011   if (!search_regs_saved)
3012     {
3013       saved_search_regs.num_regs = search_regs.num_regs;
3014       saved_search_regs.start = search_regs.start;
3015       saved_search_regs.end = search_regs.end;
3016       saved_last_thing_searched = last_thing_searched;
3017       last_thing_searched = Qnil;
3018       search_regs.num_regs = 0;
3019       search_regs.start = 0;
3020       search_regs.end = 0;
3021
3022       search_regs_saved = 1;
3023     }
3024 }
3025
3026 /* Called upon exit from filters and sentinels. */
3027 void
3028 restore_search_regs ()
3029 {
3030   if (search_regs_saved)
3031     {
3032       if (search_regs.num_regs > 0)
3033         {
3034           xfree (search_regs.start);
3035           xfree (search_regs.end);
3036         }
3037       search_regs.num_regs = saved_search_regs.num_regs;
3038       search_regs.start = saved_search_regs.start;
3039       search_regs.end = saved_search_regs.end;
3040       last_thing_searched = saved_last_thing_searched;
3041       saved_last_thing_searched = Qnil;
3042       search_regs_saved = 0;
3043     }
3044 }
3045
3046 static Lisp_Object
3047 unwind_set_match_data (list)
3048      Lisp_Object list;
3049 {
3050   /* It is safe to free (evaporate) the markers immediately.  */
3051   return Fset_match_data (list, Qevaporate);
3052 }
3053
3054 /* Called to unwind protect the match data.  */
3055 void
3056 record_unwind_save_match_data ()
3057 {
3058   record_unwind_protect (unwind_set_match_data,
3059                          Fmatch_data (Qnil, Qnil, Qnil));
3060 }
3061
3062 /* Quote a string to inactivate reg-expr chars */
3063
3064 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3065        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3066      (string)
3067      Lisp_Object string;
3068 {
3069   register unsigned char *in, *out, *end;
3070   register unsigned char *temp;
3071   int backslashes_added = 0;
3072
3073   CHECK_STRING (string);
3074
3075   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3076
3077   /* Now copy the data into the new string, inserting escapes. */
3078
3079   in = SDATA (string);
3080   end = in + SBYTES (string);
3081   out = temp;
3082
3083   for (; in != end; in++)
3084     {
3085       if (*in == '['
3086           || *in == '*' || *in == '.' || *in == '\\'
3087           || *in == '?' || *in == '+'
3088           || *in == '^' || *in == '$')
3089         *out++ = '\\', backslashes_added++;
3090       *out++ = *in;
3091     }
3092
3093   return make_specified_string (temp,
3094                                 SCHARS (string) + backslashes_added,
3095                                 out - temp,
3096                                 STRING_MULTIBYTE (string));
3097 }
3098 \f
3099 void
3100 syms_of_search ()
3101 {
3102   register int i;
3103
3104   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3105     {
3106       searchbufs[i].buf.allocated = 100;
3107       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3108       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3109       searchbufs[i].regexp = Qnil;
3110       searchbufs[i].whitespace_regexp = Qnil;
3111       searchbufs[i].syntax_table = Qnil;
3112       staticpro (&searchbufs[i].regexp);
3113       staticpro (&searchbufs[i].whitespace_regexp);
3114       staticpro (&searchbufs[i].syntax_table);
3115       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3116     }
3117   searchbuf_head = &searchbufs[0];
3118
3119   Qsearch_failed = intern ("search-failed");
3120   staticpro (&Qsearch_failed);
3121   Qinvalid_regexp = intern ("invalid-regexp");
3122   staticpro (&Qinvalid_regexp);
3123
3124   Fput (Qsearch_failed, Qerror_conditions,
3125         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3126   Fput (Qsearch_failed, Qerror_message,
3127         build_string ("Search failed"));
3128
3129   Fput (Qinvalid_regexp, Qerror_conditions,
3130         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3131   Fput (Qinvalid_regexp, Qerror_message,
3132         build_string ("Invalid regexp"));
3133
3134   last_thing_searched = Qnil;
3135   staticpro (&last_thing_searched);
3136
3137   saved_last_thing_searched = Qnil;
3138   staticpro (&saved_last_thing_searched);
3139
3140   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3141       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3142 Some commands use this for user-specified regexps.
3143 Spaces that occur inside character classes or repetition operators
3144 or other such regexp constructs are not replaced with this.
3145 A value of nil (which is the normal value) means treat spaces literally.  */);
3146   Vsearch_spaces_regexp = Qnil;
3147
3148   defsubr (&Slooking_at);
3149   defsubr (&Sposix_looking_at);
3150   defsubr (&Sstring_match);
3151   defsubr (&Sposix_string_match);
3152   defsubr (&Ssearch_forward);
3153   defsubr (&Ssearch_backward);
3154   defsubr (&Sword_search_forward);
3155   defsubr (&Sword_search_backward);
3156   defsubr (&Sre_search_forward);
3157   defsubr (&Sre_search_backward);
3158   defsubr (&Sposix_search_forward);
3159   defsubr (&Sposix_search_backward);
3160   defsubr (&Sreplace_match);
3161   defsubr (&Smatch_beginning);
3162   defsubr (&Smatch_end);
3163   defsubr (&Smatch_data);
3164   defsubr (&Sset_match_data);
3165   defsubr (&Sregexp_quote);
3166 }
3167
3168 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3169    (do not change this comment) */