code.delx.au - gnu-emacs/blob - src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
   3                  2003, 2004, 2005, 2006, 2007  Free Software Foundation, Inc.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 3, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  20 Boston, MA 02110-1301, USA.  */
  21
  22
  23 #include <config.h>
  24 #include "lisp.h"
  25 #include "syntax.h"
  26 #include "category.h"
  27 #include "buffer.h"
  28 #include "character.h"
  29 #include "charset.h"
  30 #include "region-cache.h"
  31 #include "commands.h"
  32 #include "blockinput.h"
  33 #include "intervals.h"
  34
  35 #include <sys/types.h>
  36 #include "regex.h"
  37
  38 #define REGEXP_CACHE_SIZE 20
  39
  40 /* If the regexp is non-nil, then the buffer contains the compiled form
  41    of that regexp, suitable for searching.  */
  42 struct regexp_cache
  43 {
  44   struct regexp_cache *next;
  45   Lisp_Object regexp, whitespace_regexp;
  46   /* Syntax table for which the regexp applies.  We need this because
  47      of character classes.  If this is t, then the compiled pattern is valid
  48      for any syntax-table.  */
  49   Lisp_Object syntax_table;
  50   struct re_pattern_buffer buf;
  51   char fastmap[0400];
  52   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  53   char posix;
  54 };
  55
  56 /* The instances of that struct.  */
  57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  58
  59 /* The head of the linked list; points to the most recently used buffer.  */
  60 struct regexp_cache *searchbuf_head;
  61
  62
  63 /* Every call to re_match, etc., must pass &search_regs as the regs
  64    argument unless you can show it is unnecessary (i.e., if re_match
  65    is certainly going to be called again before region-around-match
  66    can be called).
  67
  68    Since the registers are now dynamically allocated, we need to make
  69    sure not to refer to the Nth register before checking that it has
  70    been allocated by checking search_regs.num_regs.
  71
  72    The regex code keeps track of whether it has allocated the search
  73    buffer using bits in the re_pattern_buffer.  This means that whenever
  74    you compile a new pattern, it completely forgets whether it has
  75    allocated any registers, and will allocate new registers the next
  76    time you call a searching or matching function.  Therefore, we need
  77    to call re_set_registers after compiling a new pattern or after
  78    setting the match registers, so that the regex functions will be
  79    able to free or re-allocate it properly.  */
  80 static struct re_registers search_regs;
  81
  82 /* The buffer in which the last search was performed, or
  83    Qt if the last search was done in a string;
  84    Qnil if no searching has been done yet.  */
  85 static Lisp_Object last_thing_searched;
  86
  87 /* error condition signaled when regexp compile_pattern fails */
  88
  89 Lisp_Object Qinvalid_regexp;
  90
  91 /* Error condition used for failing searches */
  92 Lisp_Object Qsearch_failed;
  93
  94 Lisp_Object Vsearch_spaces_regexp;
  95
  96 /* If non-nil, the match data will not be changed during call to
  97    searching or matching functions.  This variable is for internal use
  98    only.  */
  99 Lisp_Object Vinhibit_changing_match_data;
 100
 101 static void set_search_regs ();
 102 static void save_search_regs ();
 103 static int simple_search ();
 104 static int boyer_moore ();
 105 static int search_buffer ();
 106 static void matcher_overflow () NO_RETURN;
 107
 108 static void
 109 matcher_overflow ()
 110 {
 111   error ("Stack overflow in regexp matcher");
 112 }
 113
 114 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 115    PATTERN is the pattern to compile.
 116    CP is the place to put the result.
 117    TRANSLATE is a translation table for ignoring case, or nil for none.
 118    REGP is the structure that says where to store the "register"
 119    values that will result from matching this pattern.
 120    If it is 0, we should compile the pattern not to record any
 121    subexpression bounds.
 122    POSIX is nonzero if we want full backtracking (POSIX style)
 123    for this pattern.  0 means backtrack only enough to get a valid match.
 124
 125    The behavior also depends on Vsearch_spaces_regexp.  */
 126
 127 static void
 128 compile_pattern_1 (cp, pattern, translate, regp, posix)
 129      struct regexp_cache *cp;
 130      Lisp_Object pattern;
 131      Lisp_Object translate;
 132      struct re_registers *regp;
 133      int posix;
 134 {
 135   char *val;
 136   reg_syntax_t old;
 137
 138   cp->regexp = Qnil;
 139   cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
 140   cp->posix = posix;
 141   cp->buf.multibyte = STRING_MULTIBYTE (pattern);
 142   cp->buf.charset_unibyte = charset_unibyte;
 143   cp->whitespace_regexp = Vsearch_spaces_regexp;
 144   /* rms: I think BLOCK_INPUT is not needed here any more,
 145      because regex.c defines malloc to call xmalloc.
 146      Using BLOCK_INPUT here means the debugger won't run if an error occurs.
 147      So let's turn it off.  */
 148   /*  BLOCK_INPUT;  */
 149   old = re_set_syntax (RE_SYNTAX_EMACS
 150                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 151   re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
 152                             : SDATA (Vsearch_spaces_regexp));
 153
 154   val = (char *) re_compile_pattern ((char *) SDATA (pattern),
 155                                      SBYTES (pattern), &cp->buf);
 156
 157   /* If the compiled pattern hard codes some of the contents of the
 158      syntax-table, it can only be reused with *this* syntax table.  */
 159   cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
 160
 161   re_set_whitespace_regexp (NULL);
 162
 163   re_set_syntax (old);
 164   /* UNBLOCK_INPUT;  */
 165   if (val)
 166     xsignal1 (Qinvalid_regexp, build_string (val));
 167
 168   cp->regexp = Fcopy_sequence (pattern);
 169 }
 170
 171 /* Shrink each compiled regexp buffer in the cache
 172    to the size actually used right now.
 173    This is called from garbage collection.  */
 174
 175 void
 176 shrink_regexp_cache ()
 177 {
 178   struct regexp_cache *cp;
 179
 180   for (cp = searchbuf_head; cp != 0; cp = cp->next)
 181     {
 182       cp->buf.allocated = cp->buf.used;
 183       cp->buf.buffer
 184         = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
 185     }
 186 }
 187
 188 /* Clear the regexp cache w.r.t. a particular syntax table,
 189    because it was changed.
 190    There is no danger of memory leak here because re_compile_pattern
 191    automagically manages the memory in each re_pattern_buffer struct,
 192    based on its `allocated' and `buffer' values.  */
 193 void
 194 clear_regexp_cache ()
 195 {
 196   int i;
 197
 198   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
 199     /* It's tempting to compare with the syntax-table we've actually changd,
 200        but it's not sufficient because char-table inheritance mewans that
 201        modifying one syntax-table can change others at the same time.  */
 202     if (!EQ (searchbufs[i].syntax_table, Qt))
 203       searchbufs[i].regexp = Qnil;
 204 }
 205
 206 /* Compile a regexp if necessary, but first check to see if there's one in
 207    the cache.
 208    PATTERN is the pattern to compile.
 209    TRANSLATE is a translation table for ignoring case, or nil for none.
 210    REGP is the structure that says where to store the "register"
 211    values that will result from matching this pattern.
 212    If it is 0, we should compile the pattern not to record any
 213    subexpression bounds.
 214    POSIX is nonzero if we want full backtracking (POSIX style)
 215    for this pattern.  0 means backtrack only enough to get a valid match.  */
 216
 217 struct re_pattern_buffer *
 218 compile_pattern (pattern, regp, translate, posix, multibyte)
 219      Lisp_Object pattern;
 220      struct re_registers *regp;
 221      Lisp_Object translate;
 222      int posix, multibyte;
 223 {
 224   struct regexp_cache *cp, **cpp;
 225
 226   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 227     {
 228       cp = *cpp;
 229       /* Entries are initialized to nil, and may be set to nil by
 230          compile_pattern_1 if the pattern isn't valid.  Don't apply
 231          string accessors in those cases.  However, compile_pattern_1
 232          is only applied to the cache entry we pick here to reuse.  So
 233          nil should never appear before a non-nil entry.  */
 234       if (NILP (cp->regexp))
 235         goto compile_it;
 236       if (SCHARS (cp->regexp) == SCHARS (pattern)
 237           && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
 238           && !NILP (Fstring_equal (cp->regexp, pattern))
 239           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
 240           && cp->posix == posix
 241           && (EQ (cp->syntax_table, Qt)
 242               || EQ (cp->syntax_table, current_buffer->syntax_table))
 243           && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
 244           && cp->buf.charset_unibyte == charset_unibyte)
 245         break;
 246
 247       /* If we're at the end of the cache, compile into the nil cell
 248          we found, or the last (least recently used) cell with a
 249          string value.  */
 250       if (cp->next == 0)
 251         {
 252         compile_it:
 253           compile_pattern_1 (cp, pattern, translate, regp, posix);
 254           break;
 255         }
 256     }
 257
 258   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 259      either because we found it in the cache or because we just compiled it.
 260      Move it to the front of the queue to mark it as most recently used.  */
 261   *cpp = cp->next;
 262   cp->next = searchbuf_head;
 263   searchbuf_head = cp;
 264
 265   /* Advise the searching functions about the space we have allocated
 266      for register data.  */
 267   if (regp)
 268     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 269
 270   /* The compiled pattern can be used both for mulitbyte and unibyte
 271      target.  But, we have to tell which the pattern is used for. */
 272   cp->buf.target_multibyte = multibyte;
 273
 274   return &cp->buf;
 275 }
 276
 277 \f
 278 static Lisp_Object
 279 looking_at_1 (string, posix)
 280      Lisp_Object string;
 281      int posix;
 282 {
 283   Lisp_Object val;
 284   unsigned char *p1, *p2;
 285   int s1, s2;
 286   register int i;
 287   struct re_pattern_buffer *bufp;
 288
 289   if (running_asynch_code)
 290     save_search_regs ();
 291
 292   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 293   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 294     = current_buffer->case_eqv_table;
 295
 296   CHECK_STRING (string);
 297   bufp = compile_pattern (string,
 298                           (NILP (Vinhibit_changing_match_data)
 299                            ? &search_regs : NULL),
 300                           (!NILP (current_buffer->case_fold_search)
 301                            ? current_buffer->case_canon_table : Qnil),
 302                           posix,
 303                           !NILP (current_buffer->enable_multibyte_characters));
 304
 305   immediate_quit = 1;
 306   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 307
 308   /* Get pointers and sizes of the two strings
 309      that make up the visible portion of the buffer. */
 310
 311   p1 = BEGV_ADDR;
 312   s1 = GPT_BYTE - BEGV_BYTE;
 313   p2 = GAP_END_ADDR;
 314   s2 = ZV_BYTE - GPT_BYTE;
 315   if (s1 < 0)
 316     {
 317       p2 = p1;
 318       s2 = ZV_BYTE - BEGV_BYTE;
 319       s1 = 0;
 320     }
 321   if (s2 < 0)
 322     {
 323       s1 = ZV_BYTE - BEGV_BYTE;
 324       s2 = 0;
 325     }
 326
 327   re_match_object = Qnil;
 328
 329   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 330                   PT_BYTE - BEGV_BYTE,
 331                   (NILP (Vinhibit_changing_match_data)
 332                    ? &search_regs : NULL),
 333                   ZV_BYTE - BEGV_BYTE);
 334   immediate_quit = 0;
 335
 336   if (i == -2)
 337     matcher_overflow ();
 338
 339   val = (0 <= i ? Qt : Qnil);
 340   if (NILP (Vinhibit_changing_match_data) && i >= 0)
 341     for (i = 0; i < search_regs.num_regs; i++)
 342       if (search_regs.start[i] >= 0)
 343         {
 344           search_regs.start[i]
 345             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 346           search_regs.end[i]
 347             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 348         }
 349
 350   /* Set last_thing_searched only when match data is changed.  */
 351   if (NILP (Vinhibit_changing_match_data))
 352     XSETBUFFER (last_thing_searched, current_buffer);
 353
 354   return val;
 355 }
 356
 357 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 358        doc: /* Return t if text after point matches regular expression REGEXP.
 359 This function modifies the match data that `match-beginning',
 360 `match-end' and `match-data' access; save and restore the match
 361 data if you want to preserve them.  */)
 362      (regexp)
 363      Lisp_Object regexp;
 364 {
 365   return looking_at_1 (regexp, 0);
 366 }
 367
 368 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 369        doc: /* Return t if text after point matches regular expression REGEXP.
 370 Find the longest match, in accord with Posix regular expression rules.
 371 This function modifies the match data that `match-beginning',
 372 `match-end' and `match-data' access; save and restore the match
 373 data if you want to preserve them.  */)
 374      (regexp)
 375      Lisp_Object regexp;
 376 {
 377   return looking_at_1 (regexp, 1);
 378 }
 379 \f
 380 static Lisp_Object
 381 string_match_1 (regexp, string, start, posix)
 382      Lisp_Object regexp, string, start;
 383      int posix;
 384 {
 385   int val;
 386   struct re_pattern_buffer *bufp;
 387   int pos, pos_byte;
 388   int i;
 389
 390   if (running_asynch_code)
 391     save_search_regs ();
 392
 393   CHECK_STRING (regexp);
 394   CHECK_STRING (string);
 395
 396   if (NILP (start))
 397     pos = 0, pos_byte = 0;
 398   else
 399     {
 400       int len = SCHARS (string);
 401
 402       CHECK_NUMBER (start);
 403       pos = XINT (start);
 404       if (pos < 0 && -pos <= len)
 405         pos = len + pos;
 406       else if (0 > pos || pos > len)
 407         args_out_of_range (string, start);
 408       pos_byte = string_char_to_byte (string, pos);
 409     }
 410
 411   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 412   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 413     = current_buffer->case_eqv_table;
 414
 415   bufp = compile_pattern (regexp,
 416                           (NILP (Vinhibit_changing_match_data)
 417                            ? &search_regs : NULL),
 418                           (!NILP (current_buffer->case_fold_search)
 419                            ? current_buffer->case_canon_table : Qnil),
 420                           posix,
 421                           STRING_MULTIBYTE (string));
 422   immediate_quit = 1;
 423   re_match_object = string;
 424
 425   val = re_search (bufp, (char *) SDATA (string),
 426                    SBYTES (string), pos_byte,
 427                    SBYTES (string) - pos_byte,
 428                    (NILP (Vinhibit_changing_match_data)
 429                     ? &search_regs : NULL));
 430   immediate_quit = 0;
 431
 432   /* Set last_thing_searched only when match data is changed.  */
 433   if (NILP (Vinhibit_changing_match_data))
 434     last_thing_searched = Qt;
 435
 436   if (val == -2)
 437     matcher_overflow ();
 438   if (val < 0) return Qnil;
 439
 440   if (NILP (Vinhibit_changing_match_data))
 441     for (i = 0; i < search_regs.num_regs; i++)
 442       if (search_regs.start[i] >= 0)
 443         {
 444           search_regs.start[i]
 445             = string_byte_to_char (string, search_regs.start[i]);
 446           search_regs.end[i]
 447             = string_byte_to_char (string, search_regs.end[i]);
 448         }
 449
 450   return make_number (string_byte_to_char (string, val));
 451 }
 452
 453 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 454        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 455 Matching ignores case if `case-fold-search' is non-nil.
 456 If third arg START is non-nil, start search at that index in STRING.
 457 For index of first char beyond the match, do (match-end 0).
 458 `match-end' and `match-beginning' also give indices of substrings
 459 matched by parenthesis constructs in the pattern.
 460
 461 You can use the function `match-string' to extract the substrings
 462 matched by the parenthesis constructions in REGEXP. */)
 463      (regexp, string, start)
 464      Lisp_Object regexp, string, start;
 465 {
 466   return string_match_1 (regexp, string, start, 0);
 467 }
 468
 469 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 470        doc: /* Return index of start of first match for REGEXP in STRING, or nil.
 471 Find the longest match, in accord with Posix regular expression rules.
 472 Case is ignored if `case-fold-search' is non-nil in the current buffer.
 473 If third arg START is non-nil, start search at that index in STRING.
 474 For index of first char beyond the match, do (match-end 0).
 475 `match-end' and `match-beginning' also give indices of substrings
 476 matched by parenthesis constructs in the pattern.  */)
 477      (regexp, string, start)
 478      Lisp_Object regexp, string, start;
 479 {
 480   return string_match_1 (regexp, string, start, 1);
 481 }
 482
 483 /* Match REGEXP against STRING, searching all of STRING,
 484    and return the index of the match, or negative on failure.
 485    This does not clobber the match data.  */
 486
 487 int
 488 fast_string_match (regexp, string)
 489      Lisp_Object regexp, string;
 490 {
 491   int val;
 492   struct re_pattern_buffer *bufp;
 493
 494   bufp = compile_pattern (regexp, 0, Qnil,
 495                           0, STRING_MULTIBYTE (string));
 496   immediate_quit = 1;
 497   re_match_object = string;
 498
 499   val = re_search (bufp, (char *) SDATA (string),
 500                    SBYTES (string), 0,
 501                    SBYTES (string), 0);
 502   immediate_quit = 0;
 503   return val;
 504 }
 505
 506 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 507    and return the index of the match, or negative on failure.
 508    This does not clobber the match data.
 509    We assume that STRING contains single-byte characters.  */
 510
 511 extern Lisp_Object Vascii_downcase_table;
 512
 513 int
 514 fast_c_string_match_ignore_case (regexp, string)
 515      Lisp_Object regexp;
 516      const char *string;
 517 {
 518   int val;
 519   struct re_pattern_buffer *bufp;
 520   int len = strlen (string);
 521
 522   regexp = string_make_unibyte (regexp);
 523   re_match_object = Qt;
 524   bufp = compile_pattern (regexp, 0,
 525                           Vascii_canon_table, 0,
 526                           0);
 527   immediate_quit = 1;
 528   val = re_search (bufp, string, len, 0, len, 0);
 529   immediate_quit = 0;
 530   return val;
 531 }
 532
 533 /* Like fast_string_match but ignore case.  */
 534
 535 int
 536 fast_string_match_ignore_case (regexp, string)
 537      Lisp_Object regexp, string;
 538 {
 539   int val;
 540   struct re_pattern_buffer *bufp;
 541
 542   bufp = compile_pattern (regexp, 0, Vascii_canon_table,
 543                           0, STRING_MULTIBYTE (string));
 544   immediate_quit = 1;
 545   re_match_object = string;
 546
 547   val = re_search (bufp, (char *) SDATA (string),
 548                    SBYTES (string), 0,
 549                    SBYTES (string), 0);
 550   immediate_quit = 0;
 551   return val;
 552 }
 553 \f
 554 /* The newline cache: remembering which sections of text have no newlines.  */
 555
 556 /* If the user has requested newline caching, make sure it's on.
 557    Otherwise, make sure it's off.
 558    This is our cheezy way of associating an action with the change of
 559    state of a buffer-local variable.  */
 560 static void
 561 newline_cache_on_off (buf)
 562      struct buffer *buf;
 563 {
 564   if (NILP (buf->cache_long_line_scans))
 565     {
 566       /* It should be off.  */
 567       if (buf->newline_cache)
 568         {
 569           free_region_cache (buf->newline_cache);
 570           buf->newline_cache = 0;
 571         }
 572     }
 573   else
 574     {
 575       /* It should be on.  */
 576       if (buf->newline_cache == 0)
 577         buf->newline_cache = new_region_cache ();
 578     }
 579 }
 580
 581 \f
 582 /* Search for COUNT instances of the character TARGET between START and END.
 583
 584    If COUNT is positive, search forwards; END must be >= START.
 585    If COUNT is negative, search backwards for the -COUNTth instance;
 586       END must be <= START.
 587    If COUNT is zero, do anything you please; run rogue, for all I care.
 588
 589    If END is zero, use BEGV or ZV instead, as appropriate for the
 590    direction indicated by COUNT.
 591
 592    If we find COUNT instances, set *SHORTAGE to zero, and return the
 593    position past the COUNTth match.  Note that for reverse motion
 594    this is not the same as the usual convention for Emacs motion commands.
 595
 596    If we don't find COUNT instances before reaching END, set *SHORTAGE
 597    to the number of TARGETs left unfound, and return END.
 598
 599    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 600    except when inside redisplay.  */
 601
 602 int
 603 scan_buffer (target, start, end, count, shortage, allow_quit)
 604      register int target;
 605      int start, end;
 606      int count;
 607      int *shortage;
 608      int allow_quit;
 609 {
 610   struct region_cache *newline_cache;
 611   int direction;
 612
 613   if (count > 0)
 614     {
 615       direction = 1;
 616       if (! end) end = ZV;
 617     }
 618   else
 619     {
 620       direction = -1;
 621       if (! end) end = BEGV;
 622     }
 623
 624   newline_cache_on_off (current_buffer);
 625   newline_cache = current_buffer->newline_cache;
 626
 627   if (shortage != 0)
 628     *shortage = 0;
 629
 630   immediate_quit = allow_quit;
 631
 632   if (count > 0)
 633     while (start != end)
 634       {
 635         /* Our innermost scanning loop is very simple; it doesn't know
 636            about gaps, buffer ends, or the newline cache.  ceiling is
 637            the position of the last character before the next such
 638            obstacle --- the last character the dumb search loop should
 639            examine.  */
 640         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 641         int start_byte = CHAR_TO_BYTE (start);
 642         int tem;
 643
 644         /* If we're looking for a newline, consult the newline cache
 645            to see where we can avoid some scanning.  */
 646         if (target == '\n' && newline_cache)
 647           {
 648             int next_change;
 649             immediate_quit = 0;
 650             while (region_cache_forward
 651                    (current_buffer, newline_cache, start_byte, &next_change))
 652               start_byte = next_change;
 653             immediate_quit = allow_quit;
 654
 655             /* START should never be after END.  */
 656             if (start_byte > ceiling_byte)
 657               start_byte = ceiling_byte;
 658
 659             /* Now the text after start is an unknown region, and
 660                next_change is the position of the next known region. */
 661             ceiling_byte = min (next_change - 1, ceiling_byte);
 662           }
 663
 664         /* The dumb loop can only scan text stored in contiguous
 665            bytes. BUFFER_CEILING_OF returns the last character
 666            position that is contiguous, so the ceiling is the
 667            position after that.  */
 668         tem = BUFFER_CEILING_OF (start_byte);
 669         ceiling_byte = min (tem, ceiling_byte);
 670
 671         {
 672           /* The termination address of the dumb loop.  */
 673           register unsigned char *ceiling_addr
 674             = BYTE_POS_ADDR (ceiling_byte) + 1;
 675           register unsigned char *cursor
 676             = BYTE_POS_ADDR (start_byte);
 677           unsigned char *base = cursor;
 678
 679           while (cursor < ceiling_addr)
 680             {
 681               unsigned char *scan_start = cursor;
 682
 683               /* The dumb loop.  */
 684               while (*cursor != target && ++cursor < ceiling_addr)
 685                 ;
 686
 687               /* If we're looking for newlines, cache the fact that
 688                  the region from start to cursor is free of them. */
 689               if (target == '\n' && newline_cache)
 690                 know_region_cache (current_buffer, newline_cache,
 691                                    start_byte + scan_start - base,
 692                                    start_byte + cursor - base);
 693
 694               /* Did we find the target character?  */
 695               if (cursor < ceiling_addr)
 696                 {
 697                   if (--count == 0)
 698                     {
 699                       immediate_quit = 0;
 700                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 701                     }
 702                   cursor++;
 703                 }
 704             }
 705
 706           start = BYTE_TO_CHAR (start_byte + cursor - base);
 707         }
 708       }
 709   else
 710     while (start > end)
 711       {
 712         /* The last character to check before the next obstacle.  */
 713         int ceiling_byte = CHAR_TO_BYTE (end);
 714         int start_byte = CHAR_TO_BYTE (start);
 715         int tem;
 716
 717         /* Consult the newline cache, if appropriate.  */
 718         if (target == '\n' && newline_cache)
 719           {
 720             int next_change;
 721             immediate_quit = 0;
 722             while (region_cache_backward
 723                    (current_buffer, newline_cache, start_byte, &next_change))
 724               start_byte = next_change;
 725             immediate_quit = allow_quit;
 726
 727             /* Start should never be at or before end.  */
 728             if (start_byte <= ceiling_byte)
 729               start_byte = ceiling_byte + 1;
 730
 731             /* Now the text before start is an unknown region, and
 732                next_change is the position of the next known region. */
 733             ceiling_byte = max (next_change, ceiling_byte);
 734           }
 735
 736         /* Stop scanning before the gap.  */
 737         tem = BUFFER_FLOOR_OF (start_byte - 1);
 738         ceiling_byte = max (tem, ceiling_byte);
 739
 740         {
 741           /* The termination address of the dumb loop.  */
 742           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 743           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 744           unsigned char *base = cursor;
 745
 746           while (cursor >= ceiling_addr)
 747             {
 748               unsigned char *scan_start = cursor;
 749
 750               while (*cursor != target && --cursor >= ceiling_addr)
 751                 ;
 752
 753               /* If we're looking for newlines, cache the fact that
 754                  the region from after the cursor to start is free of them.  */
 755               if (target == '\n' && newline_cache)
 756                 know_region_cache (current_buffer, newline_cache,
 757                                    start_byte + cursor - base,
 758                                    start_byte + scan_start - base);
 759
 760               /* Did we find the target character?  */
 761               if (cursor >= ceiling_addr)
 762                 {
 763                   if (++count >= 0)
 764                     {
 765                       immediate_quit = 0;
 766                       return BYTE_TO_CHAR (start_byte + cursor - base);
 767                     }
 768                   cursor--;
 769                 }
 770             }
 771
 772           start = BYTE_TO_CHAR (start_byte + cursor - base);
 773         }
 774       }
 775
 776   immediate_quit = 0;
 777   if (shortage != 0)
 778     *shortage = count * direction;
 779   return start;
 780 }
 781 \f
 782 /* Search for COUNT instances of a line boundary, which means either a
 783    newline or (if selective display enabled) a carriage return.
 784    Start at START.  If COUNT is negative, search backwards.
 785
 786    We report the resulting position by calling TEMP_SET_PT_BOTH.
 787
 788    If we find COUNT instances. we position after (always after,
 789    even if scanning backwards) the COUNTth match, and return 0.
 790
 791    If we don't find COUNT instances before reaching the end of the
 792    buffer (or the beginning, if scanning backwards), we return
 793    the number of line boundaries left unfound, and position at
 794    the limit we bumped up against.
 795
 796    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 797    except in special cases.  */
 798
 799 int
 800 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 801      int start, start_byte;
 802      int limit, limit_byte;
 803      register int count;
 804      int allow_quit;
 805 {
 806   int direction = ((count > 0) ? 1 : -1);
 807
 808   register unsigned char *cursor;
 809   unsigned char *base;
 810
 811   register int ceiling;
 812   register unsigned char *ceiling_addr;
 813
 814   int old_immediate_quit = immediate_quit;
 815
 816   /* The code that follows is like scan_buffer
 817      but checks for either newline or carriage return.  */
 818
 819   if (allow_quit)
 820     immediate_quit++;
 821
 822   start_byte = CHAR_TO_BYTE (start);
 823
 824   if (count > 0)
 825     {
 826       while (start_byte < limit_byte)
 827         {
 828           ceiling =  BUFFER_CEILING_OF (start_byte);
 829           ceiling = min (limit_byte - 1, ceiling);
 830           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 831           base = (cursor = BYTE_POS_ADDR (start_byte));
 832           while (1)
 833             {
 834               while (*cursor != '\n' && ++cursor != ceiling_addr)
 835                 ;
 836
 837               if (cursor != ceiling_addr)
 838                 {
 839                   if (--count == 0)
 840                     {
 841                       immediate_quit = old_immediate_quit;
 842                       start_byte = start_byte + cursor - base + 1;
 843                       start = BYTE_TO_CHAR (start_byte);
 844                       TEMP_SET_PT_BOTH (start, start_byte);
 845                       return 0;
 846                     }
 847                   else
 848                     if (++cursor == ceiling_addr)
 849                       break;
 850                 }
 851               else
 852                 break;
 853             }
 854           start_byte += cursor - base;
 855         }
 856     }
 857   else
 858     {
 859       while (start_byte > limit_byte)
 860         {
 861           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 862           ceiling = max (limit_byte, ceiling);
 863           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 864           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 865           while (1)
 866             {
 867               while (--cursor != ceiling_addr && *cursor != '\n')
 868                 ;
 869
 870               if (cursor != ceiling_addr)
 871                 {
 872                   if (++count == 0)
 873                     {
 874                       immediate_quit = old_immediate_quit;
 875                       /* Return the position AFTER the match we found.  */
 876                       start_byte = start_byte + cursor - base + 1;
 877                       start = BYTE_TO_CHAR (start_byte);
 878                       TEMP_SET_PT_BOTH (start, start_byte);
 879                       return 0;
 880                     }
 881                 }
 882               else
 883                 break;
 884             }
 885           /* Here we add 1 to compensate for the last decrement
 886              of CURSOR, which took it past the valid range.  */
 887           start_byte += cursor - base + 1;
 888         }
 889     }
 890
 891   TEMP_SET_PT_BOTH (limit, limit_byte);
 892   immediate_quit = old_immediate_quit;
 893
 894   return count * direction;
 895 }
 896
 897 int
 898 find_next_newline_no_quit (from, cnt)
 899      register int from, cnt;
 900 {
 901   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 902 }
 903
 904 /* Like find_next_newline, but returns position before the newline,
 905    not after, and only search up to TO.  This isn't just
 906    find_next_newline (...)-1, because you might hit TO.  */
 907
 908 int
 909 find_before_next_newline (from, to, cnt)
 910      int from, to, cnt;
 911 {
 912   int shortage;
 913   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 914
 915   if (shortage == 0)
 916     pos--;
 917
 918   return pos;
 919 }
 920 \f
 921 /* Subroutines of Lisp buffer search functions. */
 922
 923 static Lisp_Object
 924 search_command (string, bound, noerror, count, direction, RE, posix)
 925      Lisp_Object string, bound, noerror, count;
 926      int direction;
 927      int RE;
 928      int posix;
 929 {
 930   register int np;
 931   int lim, lim_byte;
 932   int n = direction;
 933
 934   if (!NILP (count))
 935     {
 936       CHECK_NUMBER (count);
 937       n *= XINT (count);
 938     }
 939
 940   CHECK_STRING (string);
 941   if (NILP (bound))
 942     {
 943       if (n > 0)
 944         lim = ZV, lim_byte = ZV_BYTE;
 945       else
 946         lim = BEGV, lim_byte = BEGV_BYTE;
 947     }
 948   else
 949     {
 950       CHECK_NUMBER_COERCE_MARKER (bound);
 951       lim = XINT (bound);
 952       if (n > 0 ? lim < PT : lim > PT)
 953         error ("Invalid search bound (wrong side of point)");
 954       if (lim > ZV)
 955         lim = ZV, lim_byte = ZV_BYTE;
 956       else if (lim < BEGV)
 957         lim = BEGV, lim_byte = BEGV_BYTE;
 958       else
 959         lim_byte = CHAR_TO_BYTE (lim);
 960     }
 961
 962   /* This is so set_image_of_range_1 in regex.c can find the EQV table.  */
 963   XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
 964     = current_buffer->case_eqv_table;
 965
 966   np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
 967                       (!NILP (current_buffer->case_fold_search)
 968                        ? current_buffer->case_canon_table
 969                        : Qnil),
 970                       (!NILP (current_buffer->case_fold_search)
 971                        ? current_buffer->case_eqv_table
 972                        : Qnil),
 973                       posix);
 974   if (np <= 0)
 975     {
 976       if (NILP (noerror))
 977         xsignal1 (Qsearch_failed, string);
 978
 979       if (!EQ (noerror, Qt))
 980         {
 981           if (lim < BEGV || lim > ZV)
 982             abort ();
 983           SET_PT_BOTH (lim, lim_byte);
 984           return Qnil;
 985 #if 0 /* This would be clean, but maybe programs depend on
 986          a value of nil here.  */
 987           np = lim;
 988 #endif
 989         }
 990       else
 991         return Qnil;
 992     }
 993
 994   if (np < BEGV || np > ZV)
 995     abort ();
 996
 997   SET_PT (np);
 998
 999   return make_number (np);
1000 }
1001 \f
1002 /* Return 1 if REGEXP it matches just one constant string.  */
1003
1004 static int
1005 trivial_regexp_p (regexp)
1006      Lisp_Object regexp;
1007 {
1008   int len = SBYTES (regexp);
1009   unsigned char *s = SDATA (regexp);
1010   while (--len >= 0)
1011     {
1012       switch (*s++)
1013         {
1014         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1015           return 0;
1016         case '\\':
1017           if (--len < 0)
1018             return 0;
1019           switch (*s++)
1020             {
1021             case '|': case '(': case ')': case '`': case '\'': case 'b':
1022             case 'B': case '<': case '>': case 'w': case 'W': case 's':
1023             case 'S': case '=': case '{': case '}': case '_':
1024             case 'c': case 'C': /* for categoryspec and notcategoryspec */
1025             case '1': case '2': case '3': case '4': case '5':
1026             case '6': case '7': case '8': case '9':
1027               return 0;
1028             }
1029         }
1030     }
1031   return 1;
1032 }
1033
1034 /* Search for the n'th occurrence of STRING in the current buffer,
1035    starting at position POS and stopping at position LIM,
1036    treating STRING as a literal string if RE is false or as
1037    a regular expression if RE is true.
1038
1039    If N is positive, searching is forward and LIM must be greater than POS.
1040    If N is negative, searching is backward and LIM must be less than POS.
1041
1042    Returns -x if x occurrences remain to be found (x > 0),
1043    or else the position at the beginning of the Nth occurrence
1044    (if searching backward) or the end (if searching forward).
1045
1046    POSIX is nonzero if we want full backtracking (POSIX style)
1047    for this pattern.  0 means backtrack only enough to get a valid match.  */
1048
1049 #define TRANSLATE(out, trt, d)                  \
1050 do                                              \
1051   {                                             \
1052     if (! NILP (trt))                           \
1053       {                                         \
1054         Lisp_Object temp;                       \
1055         temp = Faref (trt, make_number (d));    \
1056         if (INTEGERP (temp))                    \
1057           out = XINT (temp);                    \
1058         else                                    \
1059           out = d;                              \
1060       }                                         \
1061     else                                        \
1062       out = d;                                  \
1063   }                                             \
1064 while (0)
1065
1066 /* Only used in search_buffer, to record the end position of the match
1067    when searching regexps and SEARCH_REGS should not be changed
1068    (i.e. Vinhibit_changing_match_data is non-nil).  */
1069 static struct re_registers search_regs_1;
1070
1071 static int
1072 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1073                RE, trt, inverse_trt, posix)
1074      Lisp_Object string;
1075      int pos;
1076      int pos_byte;
1077      int lim;
1078      int lim_byte;
1079      int n;
1080      int RE;
1081      Lisp_Object trt;
1082      Lisp_Object inverse_trt;
1083      int posix;
1084 {
1085   int len = SCHARS (string);
1086   int len_byte = SBYTES (string);
1087   register int i;
1088
1089   if (running_asynch_code)
1090     save_search_regs ();
1091
1092   /* Searching 0 times means don't move.  */
1093   /* Null string is found at starting position.  */
1094   if (len == 0 || n == 0)
1095     {
1096       set_search_regs (pos_byte, 0);
1097       return pos;
1098     }
1099
1100   if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1101     {
1102       unsigned char *p1, *p2;
1103       int s1, s2;
1104       struct re_pattern_buffer *bufp;
1105
1106       bufp = compile_pattern (string,
1107                               (NILP (Vinhibit_changing_match_data)
1108                                ? &search_regs : &search_regs_1),
1109                               trt, posix,
1110                               !NILP (current_buffer->enable_multibyte_characters));
1111
1112       immediate_quit = 1;       /* Quit immediately if user types ^G,
1113                                    because letting this function finish
1114                                    can take too long. */
1115       QUIT;                     /* Do a pending quit right away,
1116                                    to avoid paradoxical behavior */
1117       /* Get pointers and sizes of the two strings
1118          that make up the visible portion of the buffer. */
1119
1120       p1 = BEGV_ADDR;
1121       s1 = GPT_BYTE - BEGV_BYTE;
1122       p2 = GAP_END_ADDR;
1123       s2 = ZV_BYTE - GPT_BYTE;
1124       if (s1 < 0)
1125         {
1126           p2 = p1;
1127           s2 = ZV_BYTE - BEGV_BYTE;
1128           s1 = 0;
1129         }
1130       if (s2 < 0)
1131         {
1132           s1 = ZV_BYTE - BEGV_BYTE;
1133           s2 = 0;
1134         }
1135       re_match_object = Qnil;
1136
1137       while (n < 0)
1138         {
1139           int val;
1140           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1141                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1142                              (NILP (Vinhibit_changing_match_data)
1143                               ? &search_regs : &search_regs_1),
1144                              /* Don't allow match past current point */
1145                              pos_byte - BEGV_BYTE);
1146           if (val == -2)
1147             {
1148               matcher_overflow ();
1149             }
1150           if (val >= 0)
1151             {
1152               if (NILP (Vinhibit_changing_match_data))
1153                 {
1154                   pos_byte = search_regs.start[0] + BEGV_BYTE;
1155                   for (i = 0; i < search_regs.num_regs; i++)
1156                     if (search_regs.start[i] >= 0)
1157                       {
1158                         search_regs.start[i]
1159                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1160                         search_regs.end[i]
1161                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1162                       }
1163                   XSETBUFFER (last_thing_searched, current_buffer);
1164                   /* Set pos to the new position. */
1165                   pos = search_regs.start[0];
1166                 }
1167               else
1168                 {
1169                   pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1170                   /* Set pos to the new position.  */
1171                   pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1172                 }
1173             }
1174           else
1175             {
1176               immediate_quit = 0;
1177               return (n);
1178             }
1179           n++;
1180         }
1181       while (n > 0)
1182         {
1183           int val;
1184           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1185                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1186                              (NILP (Vinhibit_changing_match_data)
1187                               ? &search_regs : &search_regs_1),
1188                              lim_byte - BEGV_BYTE);
1189           if (val == -2)
1190             {
1191               matcher_overflow ();
1192             }
1193           if (val >= 0)
1194             {
1195               if (NILP (Vinhibit_changing_match_data))
1196                 {
1197                   pos_byte = search_regs.end[0] + BEGV_BYTE;
1198                   for (i = 0; i < search_regs.num_regs; i++)
1199                     if (search_regs.start[i] >= 0)
1200                       {
1201                         search_regs.start[i]
1202                           = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1203                         search_regs.end[i]
1204                           = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1205                       }
1206                   XSETBUFFER (last_thing_searched, current_buffer);
1207                   pos = search_regs.end[0];
1208                 }
1209               else
1210                 {
1211                   pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1212                   pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1213                 }
1214             }
1215           else
1216             {
1217               immediate_quit = 0;
1218               return (0 - n);
1219             }
1220           n--;
1221         }
1222       immediate_quit = 0;
1223       return (pos);
1224     }
1225   else                          /* non-RE case */
1226     {
1227       unsigned char *raw_pattern, *pat;
1228       int raw_pattern_size;
1229       int raw_pattern_size_byte;
1230       unsigned char *patbuf;
1231       int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1232       unsigned char *base_pat;
1233       /* Set to positive if we find a non-ASCII char that need
1234          translation.  Otherwise set to zero later.  */
1235       int char_base = -1;
1236       int boyer_moore_ok = 1;
1237
1238       /* MULTIBYTE says whether the text to be searched is multibyte.
1239          We must convert PATTERN to match that, or we will not really
1240          find things right.  */
1241
1242       if (multibyte == STRING_MULTIBYTE (string))
1243         {
1244           raw_pattern = (unsigned char *) SDATA (string);
1245           raw_pattern_size = SCHARS (string);
1246           raw_pattern_size_byte = SBYTES (string);
1247         }
1248       else if (multibyte)
1249         {
1250           raw_pattern_size = SCHARS (string);
1251           raw_pattern_size_byte
1252             = count_size_as_multibyte (SDATA (string),
1253                                        raw_pattern_size);
1254           raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1255           copy_text (SDATA (string), raw_pattern,
1256                      SCHARS (string), 0, 1);
1257         }
1258       else
1259         {
1260           /* Converting multibyte to single-byte.
1261
1262              ??? Perhaps this conversion should be done in a special way
1263              by subtracting nonascii-insert-offset from each non-ASCII char,
1264              so that only the multibyte chars which really correspond to
1265              the chosen single-byte character set can possibly match.  */
1266           raw_pattern_size = SCHARS (string);
1267           raw_pattern_size_byte = SCHARS (string);
1268           raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1269           copy_text (SDATA (string), raw_pattern,
1270                      SBYTES (string), 1, 0);
1271         }
1272
1273       /* Copy and optionally translate the pattern.  */
1274       len = raw_pattern_size;
1275       len_byte = raw_pattern_size_byte;
1276       patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1277       pat = patbuf;
1278       base_pat = raw_pattern;
1279       if (multibyte)
1280         {
1281           /* Fill patbuf by translated characters in STRING while
1282              checking if we can use boyer-moore search.  If TRT is
1283              non-nil, we can use boyer-moore search only if TRT can be
1284              represented by the byte array of 256 elements.  For that,
1285              all non-ASCII case-equivalents of all case-senstive
1286              characters in STRING must belong to the same charset and
1287              row.  */
1288
1289           while (--len >= 0)
1290             {
1291               unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1292               int c, translated, inverse;
1293               int in_charlen, charlen;
1294
1295               /* If we got here and the RE flag is set, it's because we're
1296                  dealing with a regexp known to be trivial, so the backslash
1297                  just quotes the next character.  */
1298               if (RE && *base_pat == '\\')
1299                 {
1300                   len--;
1301                   raw_pattern_size--;
1302                   len_byte--;
1303                   base_pat++;
1304                 }
1305
1306               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1307
1308               if (NILP (trt))
1309                 {
1310                   str = base_pat;
1311                   charlen = in_charlen;
1312                 }
1313               else
1314                 {
1315                   /* Translate the character.  */
1316                   TRANSLATE (translated, trt, c);
1317                   charlen = CHAR_STRING (translated, str_base);
1318                   str = str_base;
1319
1320                   /* Check if C has any other case-equivalents.  */
1321                   TRANSLATE (inverse, inverse_trt, c);
1322                   /* If so, check if we can use boyer-moore.  */
1323                   if (c != inverse && boyer_moore_ok)
1324                     {
1325                       /* Check if all equivalents belong to the same
1326                          group of characters.  Note that the check of C
1327                          itself is done by the last iteration.  */
1328                       int this_char_base = -1;
1329
1330                       while (boyer_moore_ok)
1331                         {
1332                           if (ASCII_BYTE_P (inverse))
1333                             {
1334                               if (this_char_base > 0)
1335                                 boyer_moore_ok = 0;
1336                               else
1337                                 {
1338                                   this_char_base = 0;
1339                                   if (char_base < 0)
1340                                     char_base = this_char_base;
1341                                 }
1342                             }
1343                           else if (CHAR_BYTE8_P (inverse))
1344                             /* Boyer-moore search can't handle a
1345                                translation of an eight-bit
1346                                character.  */
1347                             boyer_moore_ok = 0;
1348                           else if (this_char_base < 0)
1349                             {
1350                               this_char_base = inverse & ~0x3F;
1351                               if (char_base < 0)
1352                                 char_base = this_char_base;
1353                               else if (char_base > 0
1354                                        && this_char_base != char_base)
1355                                 boyer_moore_ok = 0;
1356                             }
1357                           else if ((inverse & ~0x3F) != this_char_base)
1358                             boyer_moore_ok = 0;
1359                           if (c == inverse)
1360                             break;
1361                           TRANSLATE (inverse, inverse_trt, inverse);
1362                         }
1363                     }
1364                 }
1365               if (char_base < 0)
1366                 char_base = 0;
1367
1368               /* Store this character into the translated pattern.  */
1369               bcopy (str, pat, charlen);
1370               pat += charlen;
1371               base_pat += in_charlen;
1372               len_byte -= in_charlen;
1373             }
1374         }
1375       else
1376         {
1377           /* Unibyte buffer.  */
1378           char_base = 0;
1379           while (--len >= 0)
1380             {
1381               int c, translated;
1382
1383               /* If we got here and the RE flag is set, it's because we're
1384                  dealing with a regexp known to be trivial, so the backslash
1385                  just quotes the next character.  */
1386               if (RE && *base_pat == '\\')
1387                 {
1388                   len--;
1389                   raw_pattern_size--;
1390                   base_pat++;
1391                 }
1392               c = *base_pat++;
1393               TRANSLATE (translated, trt, c);
1394               *pat++ = translated;
1395             }
1396         }
1397
1398       len_byte = pat - patbuf;
1399       len = raw_pattern_size;
1400       pat = base_pat = patbuf;
1401
1402       if (boyer_moore_ok)
1403         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1404                             pos, pos_byte, lim, lim_byte,
1405                             char_base);
1406       else
1407         return simple_search (n, pat, len, len_byte, trt,
1408                               pos, pos_byte, lim, lim_byte);
1409     }
1410 }
1411 \f
1412 /* Do a simple string search N times for the string PAT,
1413    whose length is LEN/LEN_BYTE,
1414    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1415    TRT is the translation table.
1416
1417    Return the character position where the match is found.
1418    Otherwise, if M matches remained to be found, return -M.
1419
1420    This kind of search works regardless of what is in PAT and
1421    regardless of what is in TRT.  It is used in cases where
1422    boyer_moore cannot work.  */
1423
1424 static int
1425 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1426      int n;
1427      unsigned char *pat;
1428      int len, len_byte;
1429      Lisp_Object trt;
1430      int pos, pos_byte;
1431      int lim, lim_byte;
1432 {
1433   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1434   int forward = n > 0;
1435   /* Number of buffer bytes matched.  Note that this may be different
1436      from len_byte in a multibyte buffer.  */
1437   int match_byte;
1438
1439   if (lim > pos && multibyte)
1440     while (n > 0)
1441       {
1442         while (1)
1443           {
1444             /* Try matching at position POS.  */
1445             int this_pos = pos;
1446             int this_pos_byte = pos_byte;
1447             int this_len = len;
1448             int this_len_byte = len_byte;
1449             unsigned char *p = pat;
1450             if (pos + len > lim || pos_byte + len_byte > lim_byte)
1451               goto stop;
1452
1453             while (this_len > 0)
1454               {
1455                 int charlen, buf_charlen;
1456                 int pat_ch, buf_ch;
1457
1458                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1459                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1460                                                  ZV_BYTE - this_pos_byte,
1461                                                  buf_charlen);
1462                 TRANSLATE (buf_ch, trt, buf_ch);
1463
1464                 if (buf_ch != pat_ch)
1465                   break;
1466
1467                 this_len_byte -= charlen;
1468                 this_len--;
1469                 p += charlen;
1470
1471                 this_pos_byte += buf_charlen;
1472                 this_pos++;
1473               }
1474
1475             if (this_len == 0)
1476               {
1477                 match_byte = this_pos_byte - pos_byte;
1478                 pos += len;
1479                 pos_byte += match_byte;
1480                 break;
1481               }
1482
1483             INC_BOTH (pos, pos_byte);
1484           }
1485
1486         n--;
1487       }
1488   else if (lim > pos)
1489     while (n > 0)
1490       {
1491         while (1)
1492           {
1493             /* Try matching at position POS.  */
1494             int this_pos = pos;
1495             int this_len = len;
1496             unsigned char *p = pat;
1497
1498             if (pos + len > lim)
1499               goto stop;
1500
1501             while (this_len > 0)
1502               {
1503                 int pat_ch = *p++;
1504                 int buf_ch = FETCH_BYTE (this_pos);
1505                 TRANSLATE (buf_ch, trt, buf_ch);
1506
1507                 if (buf_ch != pat_ch)
1508                   break;
1509
1510                 this_len--;
1511                 this_pos++;
1512               }
1513
1514             if (this_len == 0)
1515               {
1516                 match_byte = len;
1517                 pos += len;
1518                 break;
1519               }
1520
1521             pos++;
1522           }
1523
1524         n--;
1525       }
1526   /* Backwards search.  */
1527   else if (lim < pos && multibyte)
1528     while (n < 0)
1529       {
1530         while (1)
1531           {
1532             /* Try matching at position POS.  */
1533             int this_pos = pos - len;
1534             int this_pos_byte;
1535             int this_len = len;
1536             int this_len_byte = len_byte;
1537             unsigned char *p = pat;
1538
1539             if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
1540               goto stop;
1541             this_pos_byte = CHAR_TO_BYTE (this_pos);
1542             match_byte = pos_byte - this_pos_byte;
1543
1544             while (this_len > 0)
1545               {
1546                 int charlen, buf_charlen;
1547                 int pat_ch, buf_ch;
1548
1549                 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1550                 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1551                                                  ZV_BYTE - this_pos_byte,
1552                                                  buf_charlen);
1553                 TRANSLATE (buf_ch, trt, buf_ch);
1554
1555                 if (buf_ch != pat_ch)
1556                   break;
1557
1558                 this_len_byte -= charlen;
1559                 this_len--;
1560                 p += charlen;
1561                 this_pos_byte += buf_charlen;
1562                 this_pos++;
1563               }
1564
1565             if (this_len == 0)
1566               {
1567                 pos -= len;
1568                 pos_byte -= match_byte;
1569                 break;
1570               }
1571
1572             DEC_BOTH (pos, pos_byte);
1573           }
1574
1575         n++;
1576       }
1577   else if (lim < pos)
1578     while (n < 0)
1579       {
1580         while (1)
1581           {
1582             /* Try matching at position POS.  */
1583             int this_pos = pos - len;
1584             int this_len = len;
1585             unsigned char *p = pat;
1586
1587             if (this_pos < lim)
1588               goto stop;
1589
1590             while (this_len > 0)
1591               {
1592                 int pat_ch = *p++;
1593                 int buf_ch = FETCH_BYTE (this_pos);
1594                 TRANSLATE (buf_ch, trt, buf_ch);
1595
1596                 if (buf_ch != pat_ch)
1597                   break;
1598                 this_len--;
1599                 this_pos++;
1600               }
1601
1602             if (this_len == 0)
1603               {
1604                 match_byte = len;
1605                 pos -= len;
1606                 break;
1607               }
1608
1609             pos--;
1610           }
1611
1612         n++;
1613       }
1614
1615  stop:
1616   if (n == 0)
1617     {
1618       if (forward)
1619         set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1620       else
1621         set_search_regs (multibyte ? pos_byte : pos, match_byte);
1622
1623       return pos;
1624     }
1625   else if (n > 0)
1626     return -n;
1627   else
1628     return n;
1629 }
1630 \f
1631 /* Do Boyer-Moore search N times for the string BASE_PAT,
1632    whose length is LEN/LEN_BYTE,
1633    from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1634    DIRECTION says which direction we search in.
1635    TRT and INVERSE_TRT are translation tables.
1636    Characters in PAT are already translated by TRT.
1637
1638    This kind of search works if all the characters in BASE_PAT that
1639    have nontrivial translation are the same aside from the last byte.
1640    This makes it possible to translate just the last byte of a
1641    character, and do so after just a simple test of the context.
1642    CHAR_BASE is nonzero if there is such a non-ASCII character.
1643
1644    If that criterion is not satisfied, do not call this function.  */
1645
1646 static int
1647 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1648              pos, pos_byte, lim, lim_byte, char_base)
1649      int n;
1650      unsigned char *base_pat;
1651      int len, len_byte;
1652      Lisp_Object trt;
1653      Lisp_Object inverse_trt;
1654      int pos, pos_byte;
1655      int lim, lim_byte;
1656      int char_base;
1657 {
1658   int direction = ((n > 0) ? 1 : -1);
1659   register int dirlen;
1660   int infinity, limit, stride_for_teases = 0;
1661   register int *BM_tab;
1662   int *BM_tab_base;
1663   register unsigned char *cursor, *p_limit;
1664   register int i, j;
1665   unsigned char *pat, *pat_end;
1666   int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1667
1668   unsigned char simple_translate[0400];
1669   /* These are set to the preceding bytes of a byte to be translated
1670      if char_base is nonzero.  As the maximum byte length of a
1671      multibyte character is 5, we have to check at most four previous
1672      bytes.  */
1673   int translate_prev_byte1 = 0;
1674   int translate_prev_byte2 = 0;
1675   int translate_prev_byte3 = 0;
1676   int translate_prev_byte4 = 0;
1677
1678   BM_tab = (int *) alloca (0400 * sizeof (int));
1679
1680   /* The general approach is that we are going to maintain that we know */
1681   /* the first (closest to the present position, in whatever direction */
1682   /* we're searching) character that could possibly be the last */
1683   /* (furthest from present position) character of a valid match.  We */
1684   /* advance the state of our knowledge by looking at that character */
1685   /* and seeing whether it indeed matches the last character of the */
1686   /* pattern.  If it does, we take a closer look.  If it does not, we */
1687   /* move our pointer (to putative last characters) as far as is */
1688   /* logically possible.  This amount of movement, which I call a */
1689   /* stride, will be the length of the pattern if the actual character */
1690   /* appears nowhere in the pattern, otherwise it will be the distance */
1691   /* from the last occurrence of that character to the end of the */
1692   /* pattern. */
1693   /* As a coding trick, an enormous stride is coded into the table for */
1694   /* characters that match the last character.  This allows use of only */
1695   /* a single test, a test for having gone past the end of the */
1696   /* permissible match region, to test for both possible matches (when */
1697   /* the stride goes past the end immediately) and failure to */
1698   /* match (where you get nudged past the end one stride at a time). */
1699
1700   /* Here we make a "mickey mouse" BM table.  The stride of the search */
1701   /* is determined only by the last character of the putative match. */
1702   /* If that character does not match, we will stride the proper */
1703   /* distance to propose a match that superimposes it on the last */
1704   /* instance of a character that matches it (per trt), or misses */
1705   /* it entirely if there is none. */
1706
1707   dirlen = len_byte * direction;
1708   infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1709
1710   /* Record position after the end of the pattern.  */
1711   pat_end = base_pat + len_byte;
1712   /* BASE_PAT points to a character that we start scanning from.
1713      It is the first character in a forward search,
1714      the last character in a backward search.  */
1715   if (direction < 0)
1716     base_pat = pat_end - 1;
1717
1718   BM_tab_base = BM_tab;
1719   BM_tab += 0400;
1720   j = dirlen;           /* to get it in a register */
1721   /* A character that does not appear in the pattern induces a */
1722   /* stride equal to the pattern length. */
1723   while (BM_tab_base != BM_tab)
1724     {
1725       *--BM_tab = j;
1726       *--BM_tab = j;
1727       *--BM_tab = j;
1728       *--BM_tab = j;
1729     }
1730
1731   /* We use this for translation, instead of TRT itself.
1732      We fill this in to handle the characters that actually
1733      occur in the pattern.  Others don't matter anyway!  */
1734   bzero (simple_translate, sizeof simple_translate);
1735   for (i = 0; i < 0400; i++)
1736     simple_translate[i] = i;
1737
1738   if (char_base)
1739     {
1740       /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE.  Only a
1741          byte following them are the target of translation.  */
1742       unsigned char str[MAX_MULTIBYTE_LENGTH];
1743       int len = CHAR_STRING (char_base, str);
1744
1745       translate_prev_byte1 = str[len - 2];
1746       if (len > 2)
1747         {
1748           translate_prev_byte2 = str[len - 3];
1749           if (len > 3)
1750             {
1751               translate_prev_byte3 = str[len - 4];
1752               if (len > 4)
1753                 translate_prev_byte4 = str[len - 5];
1754             }
1755         }
1756     }
1757
1758   i = 0;
1759   while (i != infinity)
1760     {
1761       unsigned char *ptr = base_pat + i;
1762       i += direction;
1763       if (i == dirlen)
1764         i = infinity;
1765       if (! NILP (trt))
1766         {
1767           /* If the byte currently looking at is the last of a
1768              character to check case-equivalents, set CH to that
1769              character.  An ASCII character and a non-ASCII character
1770              matching with CHAR_BASE are to be checked.  */
1771           int ch = -1;
1772
1773           if (ASCII_BYTE_P (*ptr) || ! multibyte)
1774             ch = *ptr;
1775           else if (char_base
1776                    && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1777             {
1778               unsigned char *charstart = ptr - 1;
1779
1780               while (! (CHAR_HEAD_P (*charstart)))
1781                 charstart--;
1782               ch = STRING_CHAR (charstart, ptr - charstart + 1);
1783               if (char_base != (ch & ~0x3F))
1784                 ch = -1;
1785             }
1786
1787           if (ch >= 0400)
1788             j = (ch & 0x3F) | 0200;
1789           else
1790             j = *ptr;
1791
1792           if (i == infinity)
1793             stride_for_teases = BM_tab[j];
1794
1795           BM_tab[j] = dirlen - i;
1796           /* A translation table is accompanied by its inverse -- see */
1797           /* comment following downcase_table for details */
1798           if (ch >= 0)
1799             {
1800               int starting_ch = ch;
1801               int starting_j = j;
1802
1803               while (1)
1804                 {
1805                   TRANSLATE (ch, inverse_trt, ch);
1806                   if (ch >= 0400)
1807                     j = (ch & 0x3F) | 0200;
1808                   else
1809                     j = ch;
1810
1811                   /* For all the characters that map into CH,
1812                      set up simple_translate to map the last byte
1813                      into STARTING_J.  */
1814                   simple_translate[j] = starting_j;
1815                   if (ch == starting_ch)
1816                     break;
1817                   BM_tab[j] = dirlen - i;
1818                 }
1819             }
1820         }
1821       else
1822         {
1823           j = *ptr;
1824
1825           if (i == infinity)
1826             stride_for_teases = BM_tab[j];
1827           BM_tab[j] = dirlen - i;
1828         }
1829       /* stride_for_teases tells how much to stride if we get a */
1830       /* match on the far character but are subsequently */
1831       /* disappointed, by recording what the stride would have been */
1832       /* for that character if the last character had been */
1833       /* different. */
1834     }
1835   infinity = dirlen - infinity;
1836   pos_byte += dirlen - ((direction > 0) ? direction : 0);
1837   /* loop invariant - POS_BYTE points at where last char (first
1838      char if reverse) of pattern would align in a possible match.  */
1839   while (n != 0)
1840     {
1841       int tail_end;
1842       unsigned char *tail_end_ptr;
1843
1844       /* It's been reported that some (broken) compiler thinks that
1845          Boolean expressions in an arithmetic context are unsigned.
1846          Using an explicit ?1:0 prevents this.  */
1847       if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1848           < 0)
1849         return (n * (0 - direction));
1850       /* First we do the part we can by pointers (maybe nothing) */
1851       QUIT;
1852       pat = base_pat;
1853       limit = pos_byte - dirlen + direction;
1854       if (direction > 0)
1855         {
1856           limit = BUFFER_CEILING_OF (limit);
1857           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1858              can take on without hitting edge of buffer or the gap.  */
1859           limit = min (limit, pos_byte + 20000);
1860           limit = min (limit, lim_byte - 1);
1861         }
1862       else
1863         {
1864           limit = BUFFER_FLOOR_OF (limit);
1865           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1866              can take on without hitting edge of buffer or the gap.  */
1867           limit = max (limit, pos_byte - 20000);
1868           limit = max (limit, lim_byte);
1869         }
1870       tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1871       tail_end_ptr = BYTE_POS_ADDR (tail_end);
1872
1873       if ((limit - pos_byte) * direction > 20)
1874         {
1875           unsigned char *p2;
1876
1877           p_limit = BYTE_POS_ADDR (limit);
1878           p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1879           /* In this loop, pos + cursor - p2 is the surrogate for pos */
1880           while (1)             /* use one cursor setting as long as i can */
1881             {
1882               if (direction > 0) /* worth duplicating */
1883                 {
1884                   /* Use signed comparison if appropriate
1885                      to make cursor+infinity sure to be > p_limit.
1886                      Assuming that the buffer lies in a range of addresses
1887                      that are all "positive" (as ints) or all "negative",
1888                      either kind of comparison will work as long
1889                      as we don't step by infinity.  So pick the kind
1890                      that works when we do step by infinity.  */
1891                   if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1892                     while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1893                       cursor += BM_tab[*cursor];
1894                   else
1895                     while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1896                       cursor += BM_tab[*cursor];
1897                 }
1898               else
1899                 {
1900                   if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1901                     while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1902                       cursor += BM_tab[*cursor];
1903                   else
1904                     while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1905                       cursor += BM_tab[*cursor];
1906                 }
1907 /* If you are here, cursor is beyond the end of the searched region. */
1908 /* This can happen if you match on the far character of the pattern, */
1909 /* because the "stride" of that character is infinity, a number able */
1910 /* to throw you well beyond the end of the search.  It can also */
1911 /* happen if you fail to match within the permitted region and would */
1912 /* otherwise try a character beyond that region */
1913               if ((cursor - p_limit) * direction <= len_byte)
1914                 break;  /* a small overrun is genuine */
1915               cursor -= infinity; /* large overrun = hit */
1916               i = dirlen - direction;
1917               if (! NILP (trt))
1918                 {
1919                   while ((i -= direction) + direction != 0)
1920                     {
1921                       int ch;
1922                       cursor -= direction;
1923                       /* Translate only the last byte of a character.  */
1924                       if (! multibyte
1925                           || ((cursor == tail_end_ptr
1926                                || CHAR_HEAD_P (cursor[1]))
1927                               && (CHAR_HEAD_P (cursor[0])
1928                                   /* Check if this is the last byte of
1929                                      a translable character.  */
1930                                   || (translate_prev_byte1 == cursor[-1]
1931                                       && (CHAR_HEAD_P (translate_prev_byte1)
1932                                           || (translate_prev_byte2 == cursor[-2]
1933                                               && (CHAR_HEAD_P (translate_prev_byte2)
1934                                                   || (translate_prev_byte3 == cursor[-3]))))))))
1935                         ch = simple_translate[*cursor];
1936                       else
1937                         ch = *cursor;
1938                       if (pat[i] != ch)
1939                         break;
1940                     }
1941                 }
1942               else
1943                 {
1944                   while ((i -= direction) + direction != 0)
1945                     {
1946                       cursor -= direction;
1947                       if (pat[i] != *cursor)
1948                         break;
1949                     }
1950                 }
1951               cursor += dirlen - i - direction; /* fix cursor */
1952               if (i + direction == 0)
1953                 {
1954                   int position, start, end;
1955
1956                   cursor -= direction;
1957
1958                   position = pos_byte + cursor - p2 + ((direction > 0)
1959                                                        ? 1 - len_byte : 0);
1960                   set_search_regs (position, len_byte);
1961
1962                   if (NILP (Vinhibit_changing_match_data))
1963                     {
1964                       start = search_regs.start[0];
1965                       end = search_regs.end[0];
1966                     }
1967                   else
1968                     /* If Vinhibit_changing_match_data is non-nil,
1969                        search_regs will not be changed.  So let's
1970                        compute start and end here.  */
1971                     {
1972                       start = BYTE_TO_CHAR (position);
1973                       end = BYTE_TO_CHAR (position + len_byte);
1974                     }
1975
1976                   if ((n -= direction) != 0)
1977                     cursor += dirlen; /* to resume search */
1978                   else
1979                     return direction > 0 ? end : start;
1980                 }
1981               else
1982                 cursor += stride_for_teases; /* <sigh> we lose -  */
1983             }
1984           pos_byte += cursor - p2;
1985         }
1986       else
1987         /* Now we'll pick up a clump that has to be done the hard */
1988         /* way because it covers a discontinuity */
1989         {
1990           limit = ((direction > 0)
1991                    ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1992                    : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1993           limit = ((direction > 0)
1994                    ? min (limit + len_byte, lim_byte - 1)
1995                    : max (limit - len_byte, lim_byte));
1996           /* LIMIT is now the last value POS_BYTE can have
1997              and still be valid for a possible match.  */
1998           while (1)
1999             {
2000               /* This loop can be coded for space rather than */
2001               /* speed because it will usually run only once. */
2002               /* (the reach is at most len + 21, and typically */
2003               /* does not exceed len) */
2004               while ((limit - pos_byte) * direction >= 0)
2005                 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2006               /* now run the same tests to distinguish going off the */
2007               /* end, a match or a phony match. */
2008               if ((pos_byte - limit) * direction <= len_byte)
2009                 break;  /* ran off the end */
2010               /* Found what might be a match.
2011                  Set POS_BYTE back to last (first if reverse) pos.  */
2012               pos_byte -= infinity;
2013               i = dirlen - direction;
2014               while ((i -= direction) + direction != 0)
2015                 {
2016                   int ch;
2017                   unsigned char *ptr;
2018                   pos_byte -= direction;
2019                   ptr = BYTE_POS_ADDR (pos_byte);
2020                   /* Translate only the last byte of a character.  */
2021                   if (! multibyte
2022                       || ((ptr == tail_end_ptr
2023                            || CHAR_HEAD_P (ptr[1]))
2024                           && (CHAR_HEAD_P (ptr[0])
2025                               /* Check if this is the last byte of a
2026                                  translable character.  */
2027                               || (translate_prev_byte1 == ptr[-1]
2028                                   && (CHAR_HEAD_P (translate_prev_byte1)
2029                                       || (translate_prev_byte2 == ptr[-2]
2030                                           && (CHAR_HEAD_P (translate_prev_byte2)
2031                                               || translate_prev_byte3 == ptr[-3])))))))
2032                     ch = simple_translate[*ptr];
2033                   else
2034                     ch = *ptr;
2035                   if (pat[i] != ch)
2036                     break;
2037                 }
2038               /* Above loop has moved POS_BYTE part or all the way
2039                  back to the first pos (last pos if reverse).
2040                  Set it once again at the last (first if reverse) char.  */
2041               pos_byte += dirlen - i- direction;
2042               if (i + direction == 0)
2043                 {
2044                   int position, start, end;
2045                   pos_byte -= direction;
2046
2047                   position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2048                   set_search_regs (position, len_byte);
2049
2050                   if (NILP (Vinhibit_changing_match_data))
2051                     {
2052                       start = search_regs.start[0];
2053                       end = search_regs.end[0];
2054                     }
2055                   else
2056                     /* If Vinhibit_changing_match_data is non-nil,
2057                        search_regs will not be changed.  So let's
2058                        compute start and end here.  */
2059                     {
2060                       start = BYTE_TO_CHAR (position);
2061                       end = BYTE_TO_CHAR (position + len_byte);
2062                     }
2063
2064                   if ((n -= direction) != 0)
2065                     pos_byte += dirlen; /* to resume search */
2066                   else
2067                     return direction > 0 ? end : start;
2068                 }
2069               else
2070                 pos_byte += stride_for_teases;
2071             }
2072           }
2073       /* We have done one clump.  Can we continue? */
2074       if ((lim_byte - pos_byte) * direction < 0)
2075         return ((0 - n) * direction);
2076     }
2077   return BYTE_TO_CHAR (pos_byte);
2078 }
2079
2080 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2081    for the overall match just found in the current buffer.
2082    Also clear out the match data for registers 1 and up.  */
2083
2084 static void
2085 set_search_regs (beg_byte, nbytes)
2086      int beg_byte, nbytes;
2087 {
2088   int i;
2089
2090   if (!NILP (Vinhibit_changing_match_data))
2091     return;
2092
2093   /* Make sure we have registers in which to store
2094      the match position.  */
2095   if (search_regs.num_regs == 0)
2096     {
2097       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2098       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2099       search_regs.num_regs = 2;
2100     }
2101
2102   /* Clear out the other registers.  */
2103   for (i = 1; i < search_regs.num_regs; i++)
2104     {
2105       search_regs.start[i] = -1;
2106       search_regs.end[i] = -1;
2107     }
2108
2109   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2110   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2111   XSETBUFFER (last_thing_searched, current_buffer);
2112 }
2113 \f
2114 /* Given a string of words separated by word delimiters,
2115   compute a regexp that matches those exact words
2116   separated by arbitrary punctuation.  */
2117
2118 static Lisp_Object
2119 wordify (string)
2120      Lisp_Object string;
2121 {
2122   register unsigned char *p, *o;
2123   register int i, i_byte, len, punct_count = 0, word_count = 0;
2124   Lisp_Object val;
2125   int prev_c = 0;
2126   int adjust;
2127
2128   CHECK_STRING (string);
2129   p = SDATA (string);
2130   len = SCHARS (string);
2131
2132   for (i = 0, i_byte = 0; i < len; )
2133     {
2134       int c;
2135
2136       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2137
2138       if (SYNTAX (c) != Sword)
2139         {
2140           punct_count++;
2141           if (i > 0 && SYNTAX (prev_c) == Sword)
2142             word_count++;
2143         }
2144
2145       prev_c = c;
2146     }
2147
2148   if (SYNTAX (prev_c) == Sword)
2149     word_count++;
2150   if (!word_count)
2151     return empty_unibyte_string;
2152
2153   adjust = - punct_count + 5 * (word_count - 1) + 4;
2154   if (STRING_MULTIBYTE (string))
2155     val = make_uninit_multibyte_string (len + adjust,
2156                                         SBYTES (string)
2157                                         + adjust);
2158   else
2159     val = make_uninit_string (len + adjust);
2160
2161   o = SDATA (val);
2162   *o++ = '\\';
2163   *o++ = 'b';
2164   prev_c = 0;
2165
2166   for (i = 0, i_byte = 0; i < len; )
2167     {
2168       int c;
2169       int i_byte_orig = i_byte;
2170
2171       FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2172
2173       if (SYNTAX (c) == Sword)
2174         {
2175           bcopy (SDATA (string) + i_byte_orig, o,
2176                  i_byte - i_byte_orig);
2177           o += i_byte - i_byte_orig;
2178         }
2179       else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2180         {
2181           *o++ = '\\';
2182           *o++ = 'W';
2183           *o++ = '\\';
2184           *o++ = 'W';
2185           *o++ = '*';
2186         }
2187
2188       prev_c = c;
2189     }
2190
2191   *o++ = '\\';
2192   *o++ = 'b';
2193
2194   return val;
2195 }
2196 \f
2197 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2198        "MSearch backward: ",
2199        doc: /* Search backward from point for STRING.
2200 Set point to the beginning of the occurrence found, and return point.
2201 An optional second argument bounds the search; it is a buffer position.
2202 The match found must not extend before that position.
2203 Optional third argument, if t, means if fail just return nil (no error).
2204  If not nil and not t, position at limit of search and return nil.
2205 Optional fourth argument is repeat count--search for successive occurrences.
2206
2207 Search case-sensitivity is determined by the value of the variable
2208 `case-fold-search', which see.
2209
2210 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2211      (string, bound, noerror, count)
2212      Lisp_Object string, bound, noerror, count;
2213 {
2214   return search_command (string, bound, noerror, count, -1, 0, 0);
2215 }
2216
2217 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2218        doc: /* Search forward from point for STRING.
2219 Set point to the end of the occurrence found, and return point.
2220 An optional second argument bounds the search; it is a buffer position.
2221 The match found must not extend after that position.  A value of nil is
2222   equivalent to (point-max).
2223 Optional third argument, if t, means if fail just return nil (no error).
2224   If not nil and not t, move to limit of search and return nil.
2225 Optional fourth argument is repeat count--search for successive occurrences.
2226
2227 Search case-sensitivity is determined by the value of the variable
2228 `case-fold-search', which see.
2229
2230 See also the functions `match-beginning', `match-end' and `replace-match'.  */)
2231      (string, bound, noerror, count)
2232      Lisp_Object string, bound, noerror, count;
2233 {
2234   return search_command (string, bound, noerror, count, 1, 0, 0);
2235 }
2236
2237 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2238        "sWord search backward: ",
2239        doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2240 Set point to the beginning of the occurrence found, and return point.
2241 An optional second argument bounds the search; it is a buffer position.
2242 The match found must not extend before that position.
2243 Optional third argument, if t, means if fail just return nil (no error).
2244   If not nil and not t, move to limit of search and return nil.
2245 Optional fourth argument is repeat count--search for successive occurrences.  */)
2246      (string, bound, noerror, count)
2247      Lisp_Object string, bound, noerror, count;
2248 {
2249   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2250 }
2251
2252 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2253        "sWord search: ",
2254        doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2255 Set point to the end of the occurrence found, and return point.
2256 An optional second argument bounds the search; it is a buffer position.
2257 The match found must not extend after that position.
2258 Optional third argument, if t, means if fail just return nil (no error).
2259   If not nil and not t, move to limit of search and return nil.
2260 Optional fourth argument is repeat count--search for successive occurrences.  */)
2261      (string, bound, noerror, count)
2262      Lisp_Object string, bound, noerror, count;
2263 {
2264   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2265 }
2266
2267 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2268        "sRE search backward: ",
2269        doc: /* Search backward from point for match for regular expression REGEXP.
2270 Set point to the beginning of the match, and return point.
2271 The match found is the one starting last in the buffer
2272 and yet ending before the origin of the search.
2273 An optional second argument bounds the search; it is a buffer position.
2274 The match found must start at or after that position.
2275 Optional third argument, if t, means if fail just return nil (no error).
2276   If not nil and not t, move to limit of search and return nil.
2277 Optional fourth argument is repeat count--search for successive occurrences.
2278 See also the functions `match-beginning', `match-end', `match-string',
2279 and `replace-match'.  */)
2280      (regexp, bound, noerror, count)
2281      Lisp_Object regexp, bound, noerror, count;
2282 {
2283   return search_command (regexp, bound, noerror, count, -1, 1, 0);
2284 }
2285
2286 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2287        "sRE search: ",
2288        doc: /* Search forward from point for regular expression REGEXP.
2289 Set point to the end of the occurrence found, and return point.
2290 An optional second argument bounds the search; it is a buffer position.
2291 The match found must not extend after that position.
2292 Optional third argument, if t, means if fail just return nil (no error).
2293   If not nil and not t, move to limit of search and return nil.
2294 Optional fourth argument is repeat count--search for successive occurrences.
2295 See also the functions `match-beginning', `match-end', `match-string',
2296 and `replace-match'.  */)
2297      (regexp, bound, noerror, count)
2298      Lisp_Object regexp, bound, noerror, count;
2299 {
2300   return search_command (regexp, bound, noerror, count, 1, 1, 0);
2301 }
2302
2303 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2304        "sPosix search backward: ",
2305        doc: /* Search backward from point for match for regular expression REGEXP.
2306 Find the longest match in accord with Posix regular expression rules.
2307 Set point to the beginning of the match, and return point.
2308 The match found is the one starting last in the buffer
2309 and yet ending before the origin of the search.
2310 An optional second argument bounds the search; it is a buffer position.
2311 The match found must start at or after that position.
2312 Optional third argument, if t, means if fail just return nil (no error).
2313   If not nil and not t, move to limit of search and return nil.
2314 Optional fourth argument is repeat count--search for successive occurrences.
2315 See also the functions `match-beginning', `match-end', `match-string',
2316 and `replace-match'.  */)
2317      (regexp, bound, noerror, count)
2318      Lisp_Object regexp, bound, noerror, count;
2319 {
2320   return search_command (regexp, bound, noerror, count, -1, 1, 1);
2321 }
2322
2323 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2324        "sPosix search: ",
2325        doc: /* Search forward from point for regular expression REGEXP.
2326 Find the longest match in accord with Posix regular expression rules.
2327 Set point to the end of the occurrence found, and return point.
2328 An optional second argument bounds the search; it is a buffer position.
2329 The match found must not extend after that position.
2330 Optional third argument, if t, means if fail just return nil (no error).
2331   If not nil and not t, move to limit of search and return nil.
2332 Optional fourth argument is repeat count--search for successive occurrences.
2333 See also the functions `match-beginning', `match-end', `match-string',
2334 and `replace-match'.  */)
2335      (regexp, bound, noerror, count)
2336      Lisp_Object regexp, bound, noerror, count;
2337 {
2338   return search_command (regexp, bound, noerror, count, 1, 1, 1);
2339 }
2340 \f
2341 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2342        doc: /* Replace text matched by last search with NEWTEXT.
2343 Leave point at the end of the replacement text.
2344
2345 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2346 Otherwise maybe capitalize the whole text, or maybe just word initials,
2347 based on the replaced text.
2348 If the replaced text has only capital letters
2349 and has at least one multiletter word, convert NEWTEXT to all caps.
2350 Otherwise if all words are capitalized in the replaced text,
2351 capitalize each word in NEWTEXT.
2352
2353 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2354 Otherwise treat `\\' as special:
2355   `\\&' in NEWTEXT means substitute original matched text.
2356   `\\N' means substitute what matched the Nth `\\(...\\)'.
2357        If Nth parens didn't match, substitute nothing.
2358   `\\\\' means insert one `\\'.
2359 Case conversion does not apply to these substitutions.
2360
2361 FIXEDCASE and LITERAL are optional arguments.
2362
2363 The optional fourth argument STRING can be a string to modify.
2364 This is meaningful when the previous match was done against STRING,
2365 using `string-match'.  When used this way, `replace-match'
2366 creates and returns a new string made by copying STRING and replacing
2367 the part of STRING that was matched.
2368
2369 The optional fifth argument SUBEXP specifies a subexpression;
2370 it says to replace just that subexpression with NEWTEXT,
2371 rather than replacing the entire matched text.
2372 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2373 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2374 NEWTEXT in place of subexp N.
2375 This is useful only after a regular expression search or match,
2376 since only regular expressions have distinguished subexpressions.  */)
2377      (newtext, fixedcase, literal, string, subexp)
2378      Lisp_Object newtext, fixedcase, literal, string, subexp;
2379 {
2380   enum { nochange, all_caps, cap_initial } case_action;
2381   register int pos, pos_byte;
2382   int some_multiletter_word;
2383   int some_lowercase;
2384   int some_uppercase;
2385   int some_nonuppercase_initial;
2386   register int c, prevc;
2387   int sub;
2388   int opoint, newpoint;
2389
2390   CHECK_STRING (newtext);
2391
2392   if (! NILP (string))
2393     CHECK_STRING (string);
2394
2395   case_action = nochange;       /* We tried an initialization */
2396                                 /* but some C compilers blew it */
2397
2398   if (search_regs.num_regs <= 0)
2399     error ("`replace-match' called before any match found");
2400
2401   if (NILP (subexp))
2402     sub = 0;
2403   else
2404     {
2405       CHECK_NUMBER (subexp);
2406       sub = XINT (subexp);
2407       if (sub < 0 || sub >= search_regs.num_regs)
2408         args_out_of_range (subexp, make_number (search_regs.num_regs));
2409     }
2410
2411   if (NILP (string))
2412     {
2413       if (search_regs.start[sub] < BEGV
2414           || search_regs.start[sub] > search_regs.end[sub]
2415           || search_regs.end[sub] > ZV)
2416         args_out_of_range (make_number (search_regs.start[sub]),
2417                            make_number (search_regs.end[sub]));
2418     }
2419   else
2420     {
2421       if (search_regs.start[sub] < 0
2422           || search_regs.start[sub] > search_regs.end[sub]
2423           || search_regs.end[sub] > SCHARS (string))
2424         args_out_of_range (make_number (search_regs.start[sub]),
2425                            make_number (search_regs.end[sub]));
2426     }
2427
2428   if (NILP (fixedcase))
2429     {
2430       /* Decide how to casify by examining the matched text. */
2431       int last;
2432
2433       pos = search_regs.start[sub];
2434       last = search_regs.end[sub];
2435
2436       if (NILP (string))
2437         pos_byte = CHAR_TO_BYTE (pos);
2438       else
2439         pos_byte = string_char_to_byte (string, pos);
2440
2441       prevc = '\n';
2442       case_action = all_caps;
2443
2444       /* some_multiletter_word is set nonzero if any original word
2445          is more than one letter long. */
2446       some_multiletter_word = 0;
2447       some_lowercase = 0;
2448       some_nonuppercase_initial = 0;
2449       some_uppercase = 0;
2450
2451       while (pos < last)
2452         {
2453           if (NILP (string))
2454             {
2455               c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2456               INC_BOTH (pos, pos_byte);
2457             }
2458           else
2459             FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2460
2461           if (LOWERCASEP (c))
2462             {
2463               /* Cannot be all caps if any original char is lower case */
2464
2465               some_lowercase = 1;
2466               if (SYNTAX (prevc) != Sword)
2467                 some_nonuppercase_initial = 1;
2468               else
2469                 some_multiletter_word = 1;
2470             }
2471           else if (UPPERCASEP (c))
2472             {
2473               some_uppercase = 1;
2474               if (SYNTAX (prevc) != Sword)
2475                 ;
2476               else
2477                 some_multiletter_word = 1;
2478             }
2479           else
2480             {
2481               /* If the initial is a caseless word constituent,
2482                  treat that like a lowercase initial.  */
2483               if (SYNTAX (prevc) != Sword)
2484                 some_nonuppercase_initial = 1;
2485             }
2486
2487           prevc = c;
2488         }
2489
2490       /* Convert to all caps if the old text is all caps
2491          and has at least one multiletter word.  */
2492       if (! some_lowercase && some_multiletter_word)
2493         case_action = all_caps;
2494       /* Capitalize each word, if the old text has all capitalized words.  */
2495       else if (!some_nonuppercase_initial && some_multiletter_word)
2496         case_action = cap_initial;
2497       else if (!some_nonuppercase_initial && some_uppercase)
2498         /* Should x -> yz, operating on X, give Yz or YZ?
2499            We'll assume the latter.  */
2500         case_action = all_caps;
2501       else
2502         case_action = nochange;
2503     }
2504
2505   /* Do replacement in a string.  */
2506   if (!NILP (string))
2507     {
2508       Lisp_Object before, after;
2509
2510       before = Fsubstring (string, make_number (0),
2511                            make_number (search_regs.start[sub]));
2512       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2513
2514       /* Substitute parts of the match into NEWTEXT
2515          if desired.  */
2516       if (NILP (literal))
2517         {
2518           int lastpos = 0;
2519           int lastpos_byte = 0;
2520           /* We build up the substituted string in ACCUM.  */
2521           Lisp_Object accum;
2522           Lisp_Object middle;
2523           int length = SBYTES (newtext);
2524
2525           accum = Qnil;
2526
2527           for (pos_byte = 0, pos = 0; pos_byte < length;)
2528             {
2529               int substart = -1;
2530               int subend = 0;
2531               int delbackslash = 0;
2532
2533               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2534
2535               if (c == '\\')
2536                 {
2537                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2538
2539                   if (c == '&')
2540                     {
2541                       substart = search_regs.start[sub];
2542                       subend = search_regs.end[sub];
2543                     }
2544                   else if (c >= '1' && c <= '9')
2545                     {
2546                       if (search_regs.start[c - '0'] >= 0
2547                           && c <= search_regs.num_regs + '0')
2548                         {
2549                           substart = search_regs.start[c - '0'];
2550                           subend = search_regs.end[c - '0'];
2551                         }
2552                       else
2553                         {
2554                           /* If that subexp did not match,
2555                              replace \\N with nothing.  */
2556                           substart = 0;
2557                           subend = 0;
2558                         }
2559                     }
2560                   else if (c == '\\')
2561                     delbackslash = 1;
2562                   else
2563                     error ("Invalid use of `\\' in replacement text");
2564                 }
2565               if (substart >= 0)
2566                 {
2567                   if (pos - 2 != lastpos)
2568                     middle = substring_both (newtext, lastpos,
2569                                              lastpos_byte,
2570                                              pos - 2, pos_byte - 2);
2571                   else
2572                     middle = Qnil;
2573                   accum = concat3 (accum, middle,
2574                                    Fsubstring (string,
2575                                                make_number (substart),
2576                                                make_number (subend)));
2577                   lastpos = pos;
2578                   lastpos_byte = pos_byte;
2579                 }
2580               else if (delbackslash)
2581                 {
2582                   middle = substring_both (newtext, lastpos,
2583                                            lastpos_byte,
2584                                            pos - 1, pos_byte - 1);
2585
2586                   accum = concat2 (accum, middle);
2587                   lastpos = pos;
2588                   lastpos_byte = pos_byte;
2589                 }
2590             }
2591
2592           if (pos != lastpos)
2593             middle = substring_both (newtext, lastpos,
2594                                      lastpos_byte,
2595                                      pos, pos_byte);
2596           else
2597             middle = Qnil;
2598
2599           newtext = concat2 (accum, middle);
2600         }
2601
2602       /* Do case substitution in NEWTEXT if desired.  */
2603       if (case_action == all_caps)
2604         newtext = Fupcase (newtext);
2605       else if (case_action == cap_initial)
2606         newtext = Fupcase_initials (newtext);
2607
2608       return concat3 (before, newtext, after);
2609     }
2610
2611   /* Record point, then move (quietly) to the start of the match.  */
2612   if (PT >= search_regs.end[sub])
2613     opoint = PT - ZV;
2614   else if (PT > search_regs.start[sub])
2615     opoint = search_regs.end[sub] - ZV;
2616   else
2617     opoint = PT;
2618
2619   /* If we want non-literal replacement,
2620      perform substitution on the replacement string.  */
2621   if (NILP (literal))
2622     {
2623       int length = SBYTES (newtext);
2624       unsigned char *substed;
2625       int substed_alloc_size, substed_len;
2626       int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2627       int str_multibyte = STRING_MULTIBYTE (newtext);
2628       Lisp_Object rev_tbl;
2629       int really_changed = 0;
2630
2631       rev_tbl = Qnil;
2632
2633       substed_alloc_size = length * 2 + 100;
2634       substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2635       substed_len = 0;
2636
2637       /* Go thru NEWTEXT, producing the actual text to insert in
2638          SUBSTED while adjusting multibyteness to that of the current
2639          buffer.  */
2640
2641       for (pos_byte = 0, pos = 0; pos_byte < length;)
2642         {
2643           unsigned char str[MAX_MULTIBYTE_LENGTH];
2644           unsigned char *add_stuff = NULL;
2645           int add_len = 0;
2646           int idx = -1;
2647
2648           if (str_multibyte)
2649             {
2650               FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2651               if (!buf_multibyte)
2652                 c = multibyte_char_to_unibyte (c, rev_tbl);
2653             }
2654           else
2655             {
2656               /* Note that we don't have to increment POS.  */
2657               c = SREF (newtext, pos_byte++);
2658               if (buf_multibyte)
2659                 c = unibyte_char_to_multibyte (c);
2660             }
2661
2662           /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2663              or set IDX to a match index, which means put that part
2664              of the buffer text into SUBSTED.  */
2665
2666           if (c == '\\')
2667             {
2668               really_changed = 1;
2669
2670               if (str_multibyte)
2671                 {
2672                   FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2673                                                       pos, pos_byte);
2674                   if (!buf_multibyte && !ASCII_CHAR_P (c))
2675                     c = multibyte_char_to_unibyte (c, rev_tbl);
2676                 }
2677               else
2678                 {
2679                   c = SREF (newtext, pos_byte++);
2680                   if (buf_multibyte)
2681                     c = unibyte_char_to_multibyte (c);
2682                 }
2683
2684               if (c == '&')
2685                 idx = sub;
2686               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2687                 {
2688                   if (search_regs.start[c - '0'] >= 1)
2689                     idx = c - '0';
2690                 }
2691               else if (c == '\\')
2692                 add_len = 1, add_stuff = "\\";
2693               else
2694                 {
2695                   xfree (substed);
2696                   error ("Invalid use of `\\' in replacement text");
2697                 }
2698             }
2699           else
2700             {
2701               add_len = CHAR_STRING (c, str);
2702               add_stuff = str;
2703             }
2704
2705           /* If we want to copy part of a previous match,
2706              set up ADD_STUFF and ADD_LEN to point to it.  */
2707           if (idx >= 0)
2708             {
2709               int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2710               add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2711               if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2712                 move_gap (search_regs.start[idx]);
2713               add_stuff = BYTE_POS_ADDR (begbyte);
2714             }
2715
2716           /* Now the stuff we want to add to SUBSTED
2717              is invariably ADD_LEN bytes starting at ADD_STUFF.  */
2718
2719           /* Make sure SUBSTED is big enough.  */
2720           if (substed_len + add_len >= substed_alloc_size)
2721             {
2722               substed_alloc_size = substed_len + add_len + 500;
2723               substed = (unsigned char *) xrealloc (substed,
2724                                                     substed_alloc_size + 1);
2725             }
2726
2727           /* Now add to the end of SUBSTED.  */
2728           if (add_stuff)
2729             {
2730               bcopy (add_stuff, substed + substed_len, add_len);
2731               substed_len += add_len;
2732             }
2733         }
2734
2735       if (really_changed)
2736         {
2737           if (buf_multibyte)
2738             {
2739               int nchars = multibyte_chars_in_text (substed, substed_len);
2740
2741               newtext = make_multibyte_string (substed, nchars, substed_len);
2742             }
2743           else
2744             newtext = make_unibyte_string (substed, substed_len);
2745         }
2746       xfree (substed);
2747     }
2748
2749   /* Replace the old text with the new in the cleanest possible way.  */
2750   replace_range (search_regs.start[sub], search_regs.end[sub],
2751                  newtext, 1, 0, 1);
2752   newpoint = search_regs.start[sub] + SCHARS (newtext);
2753
2754   if (case_action == all_caps)
2755     Fupcase_region (make_number (search_regs.start[sub]),
2756                     make_number (newpoint));
2757   else if (case_action == cap_initial)
2758     Fupcase_initials_region (make_number (search_regs.start[sub]),
2759                              make_number (newpoint));
2760
2761   /* Adjust search data for this change.  */
2762   {
2763     int oldend = search_regs.end[sub];
2764     int oldstart = search_regs.start[sub];
2765     int change = newpoint - search_regs.end[sub];
2766     int i;
2767
2768     for (i = 0; i < search_regs.num_regs; i++)
2769       {
2770         if (search_regs.start[i] >= oldend)
2771           search_regs.start[i] += change;
2772         else if (search_regs.start[i] > oldstart)
2773           search_regs.start[i] = oldstart;
2774         if (search_regs.end[i] >= oldend)
2775           search_regs.end[i] += change;
2776         else if (search_regs.end[i] > oldstart)
2777           search_regs.end[i] = oldstart;
2778       }
2779   }
2780
2781   /* Put point back where it was in the text.  */
2782   if (opoint <= 0)
2783     TEMP_SET_PT (opoint + ZV);
2784   else
2785     TEMP_SET_PT (opoint);
2786
2787   /* Now move point "officially" to the start of the inserted replacement.  */
2788   move_if_not_intangible (newpoint);
2789
2790   return Qnil;
2791 }
2792 \f
2793 static Lisp_Object
2794 match_limit (num, beginningp)
2795      Lisp_Object num;
2796      int beginningp;
2797 {
2798   register int n;
2799
2800   CHECK_NUMBER (num);
2801   n = XINT (num);
2802   if (n < 0)
2803     args_out_of_range (num, make_number (0));
2804   if (search_regs.num_regs <= 0)
2805     error ("No match data, because no search succeeded");
2806   if (n >= search_regs.num_regs
2807       || search_regs.start[n] < 0)
2808     return Qnil;
2809   return (make_number ((beginningp) ? search_regs.start[n]
2810                                     : search_regs.end[n]));
2811 }
2812
2813 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2814        doc: /* Return position of start of text matched by last search.
2815 SUBEXP, a number, specifies which parenthesized expression in the last
2816   regexp.
2817 Value is nil if SUBEXPth pair didn't match, or there were less than
2818   SUBEXP pairs.
2819 Zero means the entire text matched by the whole regexp or whole string.  */)
2820      (subexp)
2821      Lisp_Object subexp;
2822 {
2823   return match_limit (subexp, 1);
2824 }
2825
2826 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2827        doc: /* Return position of end of text matched by last search.
2828 SUBEXP, a number, specifies which parenthesized expression in the last
2829   regexp.
2830 Value is nil if SUBEXPth pair didn't match, or there were less than
2831   SUBEXP pairs.
2832 Zero means the entire text matched by the whole regexp or whole string.  */)
2833      (subexp)
2834      Lisp_Object subexp;
2835 {
2836   return match_limit (subexp, 0);
2837 }
2838
2839 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2840        doc: /* Return a list containing all info on what the last search matched.
2841 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2842 All the elements are markers or nil (nil if the Nth pair didn't match)
2843 if the last match was on a buffer; integers or nil if a string was matched.
2844 Use `store-match-data' to reinstate the data in this list.
2845
2846 If INTEGERS (the optional first argument) is non-nil, always use
2847 integers \(rather than markers) to represent buffer positions.  In
2848 this case, and if the last match was in a buffer, the buffer will get
2849 stored as one additional element at the end of the list.
2850
2851 If REUSE is a list, reuse it as part of the value.  If REUSE is long
2852 enough to hold all the values, and if INTEGERS is non-nil, no consing
2853 is done.
2854
2855 If optional third arg RESEAT is non-nil, any previous markers on the
2856 REUSE list will be modified to point to nowhere.
2857
2858 Return value is undefined if the last search failed.  */)
2859   (integers, reuse, reseat)
2860      Lisp_Object integers, reuse, reseat;
2861 {
2862   Lisp_Object tail, prev;
2863   Lisp_Object *data;
2864   int i, len;
2865
2866   if (!NILP (reseat))
2867     for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2868       if (MARKERP (XCAR (tail)))
2869         {
2870           unchain_marker (XMARKER (XCAR (tail)));
2871           XSETCAR (tail, Qnil);
2872         }
2873
2874   if (NILP (last_thing_searched))
2875     return Qnil;
2876
2877   prev = Qnil;
2878
2879   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2880                                  * sizeof (Lisp_Object));
2881
2882   len = 0;
2883   for (i = 0; i < search_regs.num_regs; i++)
2884     {
2885       int start = search_regs.start[i];
2886       if (start >= 0)
2887         {
2888           if (EQ (last_thing_searched, Qt)
2889               || ! NILP (integers))
2890             {
2891               XSETFASTINT (data[2 * i], start);
2892               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2893             }
2894           else if (BUFFERP (last_thing_searched))
2895             {
2896               data[2 * i] = Fmake_marker ();
2897               Fset_marker (data[2 * i],
2898                            make_number (start),
2899                            last_thing_searched);
2900               data[2 * i + 1] = Fmake_marker ();
2901               Fset_marker (data[2 * i + 1],
2902                            make_number (search_regs.end[i]),
2903                            last_thing_searched);
2904             }
2905           else
2906             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2907             abort ();
2908
2909           len = 2 * i + 2;
2910         }
2911       else
2912         data[2 * i] = data[2 * i + 1] = Qnil;
2913     }
2914
2915   if (BUFFERP (last_thing_searched) && !NILP (integers))
2916     {
2917       data[len] = last_thing_searched;
2918       len++;
2919     }
2920
2921   /* If REUSE is not usable, cons up the values and return them.  */
2922   if (! CONSP (reuse))
2923     return Flist (len, data);
2924
2925   /* If REUSE is a list, store as many value elements as will fit
2926      into the elements of REUSE.  */
2927   for (i = 0, tail = reuse; CONSP (tail);
2928        i++, tail = XCDR (tail))
2929     {
2930       if (i < len)
2931         XSETCAR (tail, data[i]);
2932       else
2933         XSETCAR (tail, Qnil);
2934       prev = tail;
2935     }
2936
2937   /* If we couldn't fit all value elements into REUSE,
2938      cons up the rest of them and add them to the end of REUSE.  */
2939   if (i < len)
2940     XSETCDR (prev, Flist (len - i, data + i));
2941
2942   return reuse;
2943 }
2944
2945 /* We used to have an internal use variant of `reseat' described as:
2946
2947       If RESEAT is `evaporate', put the markers back on the free list
2948       immediately.  No other references to the markers must exist in this
2949       case, so it is used only internally on the unwind stack and
2950       save-match-data from Lisp.
2951
2952    But it was ill-conceived: those supposedly-internal markers get exposed via
2953    the undo-list, so freeing them here is unsafe.  */
2954
2955 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2956        doc: /* Set internal data on last search match from elements of LIST.
2957 LIST should have been created by calling `match-data' previously.
2958
2959 If optional arg RESEAT is non-nil, make markers on LIST point nowhere.  */)
2960     (list, reseat)
2961      register Lisp_Object list, reseat;
2962 {
2963   register int i;
2964   register Lisp_Object marker;
2965
2966   if (running_asynch_code)
2967     save_search_regs ();
2968
2969   CHECK_LIST (list);
2970
2971   /* Unless we find a marker with a buffer or an explicit buffer
2972      in LIST, assume that this match data came from a string.  */
2973   last_thing_searched = Qt;
2974
2975   /* Allocate registers if they don't already exist.  */
2976   {
2977     int length = XFASTINT (Flength (list)) / 2;
2978
2979     if (length > search_regs.num_regs)
2980       {
2981         if (search_regs.num_regs == 0)
2982           {
2983             search_regs.start
2984               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2985             search_regs.end
2986               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2987           }
2988         else
2989           {
2990             search_regs.start
2991               = (regoff_t *) xrealloc (search_regs.start,
2992                                        length * sizeof (regoff_t));
2993             search_regs.end
2994               = (regoff_t *) xrealloc (search_regs.end,
2995                                        length * sizeof (regoff_t));
2996           }
2997
2998         for (i = search_regs.num_regs; i < length; i++)
2999           search_regs.start[i] = -1;
3000
3001         search_regs.num_regs = length;
3002       }
3003
3004     for (i = 0; CONSP (list); i++)
3005       {
3006         marker = XCAR (list);
3007         if (BUFFERP (marker))
3008           {
3009             last_thing_searched = marker;
3010             break;
3011           }
3012         if (i >= length)
3013           break;
3014         if (NILP (marker))
3015           {
3016             search_regs.start[i] = -1;
3017             list = XCDR (list);
3018           }
3019         else
3020           {
3021             int from;
3022             Lisp_Object m;
3023
3024             m = marker;
3025             if (MARKERP (marker))
3026               {
3027                 if (XMARKER (marker)->buffer == 0)
3028                   XSETFASTINT (marker, 0);
3029                 else
3030                   XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3031               }
3032
3033             CHECK_NUMBER_COERCE_MARKER (marker);
3034             from = XINT (marker);
3035
3036             if (!NILP (reseat) && MARKERP (m))
3037               {
3038                 unchain_marker (XMARKER (m));
3039                 XSETCAR (list, Qnil);
3040               }
3041
3042             if ((list = XCDR (list), !CONSP (list)))
3043               break;
3044
3045             m = marker = XCAR (list);
3046
3047             if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3048               XSETFASTINT (marker, 0);
3049
3050             CHECK_NUMBER_COERCE_MARKER (marker);
3051             search_regs.start[i] = from;
3052             search_regs.end[i] = XINT (marker);
3053
3054             if (!NILP (reseat) && MARKERP (m))
3055               {
3056                 unchain_marker (XMARKER (m));
3057                 XSETCAR (list, Qnil);
3058               }
3059           }
3060         list = XCDR (list);
3061       }
3062
3063     for (; i < search_regs.num_regs; i++)
3064       search_regs.start[i] = -1;
3065   }
3066
3067   return Qnil;
3068 }
3069
3070 /* If non-zero the match data have been saved in saved_search_regs
3071    during the execution of a sentinel or filter. */
3072 static int search_regs_saved;
3073 static struct re_registers saved_search_regs;
3074 static Lisp_Object saved_last_thing_searched;
3075
3076 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3077    if asynchronous code (filter or sentinel) is running. */
3078 static void
3079 save_search_regs ()
3080 {
3081   if (!search_regs_saved)
3082     {
3083       saved_search_regs.num_regs = search_regs.num_regs;
3084       saved_search_regs.start = search_regs.start;
3085       saved_search_regs.end = search_regs.end;
3086       saved_last_thing_searched = last_thing_searched;
3087       last_thing_searched = Qnil;
3088       search_regs.num_regs = 0;
3089       search_regs.start = 0;
3090       search_regs.end = 0;
3091
3092       search_regs_saved = 1;
3093     }
3094 }
3095
3096 /* Called upon exit from filters and sentinels. */
3097 void
3098 restore_search_regs ()
3099 {
3100   if (search_regs_saved)
3101     {
3102       if (search_regs.num_regs > 0)
3103         {
3104           xfree (search_regs.start);
3105           xfree (search_regs.end);
3106         }
3107       search_regs.num_regs = saved_search_regs.num_regs;
3108       search_regs.start = saved_search_regs.start;
3109       search_regs.end = saved_search_regs.end;
3110       last_thing_searched = saved_last_thing_searched;
3111       saved_last_thing_searched = Qnil;
3112       search_regs_saved = 0;
3113     }
3114 }
3115
3116 static Lisp_Object
3117 unwind_set_match_data (list)
3118      Lisp_Object list;
3119 {
3120   /* It is NOT ALWAYS safe to free (evaporate) the markers immediately.  */
3121   return Fset_match_data (list, Qt);
3122 }
3123
3124 /* Called to unwind protect the match data.  */
3125 void
3126 record_unwind_save_match_data ()
3127 {
3128   record_unwind_protect (unwind_set_match_data,
3129                          Fmatch_data (Qnil, Qnil, Qnil));
3130 }
3131
3132 /* Quote a string to inactivate reg-expr chars */
3133
3134 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3135        doc: /* Return a regexp string which matches exactly STRING and nothing else.  */)
3136      (string)
3137      Lisp_Object string;
3138 {
3139   register unsigned char *in, *out, *end;
3140   register unsigned char *temp;
3141   int backslashes_added = 0;
3142
3143   CHECK_STRING (string);
3144
3145   temp = (unsigned char *) alloca (SBYTES (string) * 2);
3146
3147   /* Now copy the data into the new string, inserting escapes. */
3148
3149   in = SDATA (string);
3150   end = in + SBYTES (string);
3151   out = temp;
3152
3153   for (; in != end; in++)
3154     {
3155       if (*in == '['
3156           || *in == '*' || *in == '.' || *in == '\\'
3157           || *in == '?' || *in == '+'
3158           || *in == '^' || *in == '$')
3159         *out++ = '\\', backslashes_added++;
3160       *out++ = *in;
3161     }
3162
3163   return make_specified_string (temp,
3164                                 SCHARS (string) + backslashes_added,
3165                                 out - temp,
3166                                 STRING_MULTIBYTE (string));
3167 }
3168 \f
3169 void
3170 syms_of_search ()
3171 {
3172   register int i;
3173
3174   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3175     {
3176       searchbufs[i].buf.allocated = 100;
3177       searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3178       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3179       searchbufs[i].regexp = Qnil;
3180       searchbufs[i].whitespace_regexp = Qnil;
3181       searchbufs[i].syntax_table = Qnil;
3182       staticpro (&searchbufs[i].regexp);
3183       staticpro (&searchbufs[i].whitespace_regexp);
3184       staticpro (&searchbufs[i].syntax_table);
3185       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3186     }
3187   searchbuf_head = &searchbufs[0];
3188
3189   Qsearch_failed = intern ("search-failed");
3190   staticpro (&Qsearch_failed);
3191   Qinvalid_regexp = intern ("invalid-regexp");
3192   staticpro (&Qinvalid_regexp);
3193
3194   Fput (Qsearch_failed, Qerror_conditions,
3195         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3196   Fput (Qsearch_failed, Qerror_message,
3197         build_string ("Search failed"));
3198
3199   Fput (Qinvalid_regexp, Qerror_conditions,
3200         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3201   Fput (Qinvalid_regexp, Qerror_message,
3202         build_string ("Invalid regexp"));
3203
3204   last_thing_searched = Qnil;
3205   staticpro (&last_thing_searched);
3206
3207   saved_last_thing_searched = Qnil;
3208   staticpro (&saved_last_thing_searched);
3209
3210   DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3211       doc: /* Regexp to substitute for bunches of spaces in regexp search.
3212 Some commands use this for user-specified regexps.
3213 Spaces that occur inside character classes or repetition operators
3214 or other such regexp constructs are not replaced with this.
3215 A value of nil (which is the normal value) means treat spaces literally.  */);
3216   Vsearch_spaces_regexp = Qnil;
3217
3218   DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3219       doc: /* Internal use only.
3220 If non-nil, the primitive searching and matching functions
3221 such as `looking-at', `string-match', `re-search-forward', etc.,
3222 do not set the match data.  The proper way to use this variable
3223 is to bind it with `let' around a small expression.  */);
3224   Vinhibit_changing_match_data = Qnil;
3225
3226   defsubr (&Slooking_at);
3227   defsubr (&Sposix_looking_at);
3228   defsubr (&Sstring_match);
3229   defsubr (&Sposix_string_match);
3230   defsubr (&Ssearch_forward);
3231   defsubr (&Ssearch_backward);
3232   defsubr (&Sword_search_forward);
3233   defsubr (&Sword_search_backward);
3234   defsubr (&Sre_search_forward);
3235   defsubr (&Sre_search_backward);
3236   defsubr (&Sposix_search_forward);
3237   defsubr (&Sposix_search_backward);
3238   defsubr (&Sreplace_match);
3239   defsubr (&Smatch_beginning);
3240   defsubr (&Smatch_end);
3241   defsubr (&Smatch_data);
3242   defsubr (&Sset_match_data);
3243   defsubr (&Sregexp_quote);
3244 }
3245
3246 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3247    (do not change this comment) */