code.delx.au - gnu-emacs/blob - src/search.c

   1 /* String search routines for GNU Emacs.
   2    Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
   3
   4 This file is part of GNU Emacs.
   5
   6 GNU Emacs is free software; you can redistribute it and/or modify
   7 it under the terms of the GNU General Public License as published by
   8 the Free Software Foundation; either version 2, or (at your option)
   9 any later version.
  10
  11 GNU Emacs is distributed in the hope that it will be useful,
  12 but WITHOUT ANY WARRANTY; without even the implied warranty of
  13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  14 GNU General Public License for more details.
  15
  16 You should have received a copy of the GNU General Public License
  17 along with GNU Emacs; see the file COPYING.  If not, write to
  18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  19 Boston, MA 02111-1307, USA.  */
  20
  21
  22 #include <config.h>
  23 #include "lisp.h"
  24 #include "syntax.h"
  25 #include "category.h"
  26 #include "buffer.h"
  27 #include "charset.h"
  28 #include "region-cache.h"
  29 #include "commands.h"
  30 #include "blockinput.h"
  31 #include "intervals.h"
  32
  33 #include <sys/types.h>
  34 #include "regex.h"
  35
  36 #define REGEXP_CACHE_SIZE 20
  37
  38 /* If the regexp is non-nil, then the buffer contains the compiled form
  39    of that regexp, suitable for searching.  */
  40 struct regexp_cache
  41 {
  42   struct regexp_cache *next;
  43   Lisp_Object regexp;
  44   struct re_pattern_buffer buf;
  45   char fastmap[0400];
  46   /* Nonzero means regexp was compiled to do full POSIX backtracking.  */
  47   char posix;
  48 };
  49
  50 /* The instances of that struct.  */
  51 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
  52
  53 /* The head of the linked list; points to the most recently used buffer.  */
  54 struct regexp_cache *searchbuf_head;
  55
  56
  57 /* Every call to re_match, etc., must pass &search_regs as the regs
  58    argument unless you can show it is unnecessary (i.e., if re_match
  59    is certainly going to be called again before region-around-match
  60    can be called).
  61
  62    Since the registers are now dynamically allocated, we need to make
  63    sure not to refer to the Nth register before checking that it has
  64    been allocated by checking search_regs.num_regs.
  65
  66    The regex code keeps track of whether it has allocated the search
  67    buffer using bits in the re_pattern_buffer.  This means that whenever
  68    you compile a new pattern, it completely forgets whether it has
  69    allocated any registers, and will allocate new registers the next
  70    time you call a searching or matching function.  Therefore, we need
  71    to call re_set_registers after compiling a new pattern or after
  72    setting the match registers, so that the regex functions will be
  73    able to free or re-allocate it properly.  */
  74 static struct re_registers search_regs;
  75
  76 /* The buffer in which the last search was performed, or
  77    Qt if the last search was done in a string;
  78    Qnil if no searching has been done yet.  */
  79 static Lisp_Object last_thing_searched;
  80
  81 /* error condition signaled when regexp compile_pattern fails */
  82
  83 Lisp_Object Qinvalid_regexp;
  84
  85 static void set_search_regs ();
  86 static void save_search_regs ();
  87
  88 static int search_buffer ();
  89
  90 static void
  91 matcher_overflow ()
  92 {
  93   error ("Stack overflow in regexp matcher");
  94 }
  95
  96 #ifdef __STDC__
  97 #define CONST const
  98 #else
  99 #define CONST
 100 #endif
 101
 102 /* Compile a regexp and signal a Lisp error if anything goes wrong.
 103    PATTERN is the pattern to compile.
 104    CP is the place to put the result.
 105    TRANSLATE is a translation table for ignoring case, or NULL for none.
 106    REGP is the structure that says where to store the "register"
 107    values that will result from matching this pattern.
 108    If it is 0, we should compile the pattern not to record any
 109    subexpression bounds.
 110    POSIX is nonzero if we want full backtracking (POSIX style)
 111    for this pattern.  0 means backtrack only enough to get a valid match.
 112    MULTIBYTE is nonzero if we want to handle multibyte characters in
 113    PATTERN.  0 means all multibyte characters are recognized just as
 114    sequences of binary data.  */
 115
 116 static void
 117 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
 118      struct regexp_cache *cp;
 119      Lisp_Object pattern;
 120      Lisp_Object *translate;
 121      struct re_registers *regp;
 122      int posix;
 123      int multibyte;
 124 {
 125   char *raw_pattern;
 126   int raw_pattern_size;
 127   char *val;
 128   reg_syntax_t old;
 129
 130   /* MULTIBYTE says whether the text to be searched is multibyte.
 131      We must convert PATTERN to match that, or we will not really
 132      find things right.  */
 133
 134   if (multibyte == STRING_MULTIBYTE (pattern))
 135     {
 136       raw_pattern = (char *) XSTRING (pattern)->data;
 137       raw_pattern_size = XSTRING (pattern)->size_byte;
 138     }
 139   else if (multibyte)
 140     {
 141       raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
 142                                                   XSTRING (pattern)->size);
 143       raw_pattern = (char *) alloca (raw_pattern_size + 1);
 144       copy_text (XSTRING (pattern)->data, raw_pattern,
 145                  XSTRING (pattern)->size, 0, 1);
 146     }
 147   else
 148     {
 149       /* Converting multibyte to single-byte.
 150
 151          ??? Perhaps this conversion should be done in a special way
 152          by subtracting nonascii-insert-offset from each non-ASCII char,
 153          so that only the multibyte chars which really correspond to
 154          the chosen single-byte character set can possibly match.  */
 155       raw_pattern_size = XSTRING (pattern)->size;
 156       raw_pattern = (char *) alloca (raw_pattern_size + 1);
 157       copy_text (XSTRING (pattern)->data, raw_pattern,
 158                  XSTRING (pattern)->size, 1, 0);
 159     }
 160
 161   cp->regexp = Qnil;
 162   cp->buf.translate = translate;
 163   cp->posix = posix;
 164   cp->buf.multibyte = multibyte;
 165   BLOCK_INPUT;
 166   old = re_set_syntax (RE_SYNTAX_EMACS
 167                        | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
 168   val = (char *) re_compile_pattern (raw_pattern, raw_pattern_size, &cp->buf);
 169   re_set_syntax (old);
 170   UNBLOCK_INPUT;
 171   if (val)
 172     Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
 173
 174   cp->regexp = Fcopy_sequence (pattern);
 175 }
 176
 177 /* Compile a regexp if necessary, but first check to see if there's one in
 178    the cache.
 179    PATTERN is the pattern to compile.
 180    TRANSLATE is a translation table for ignoring case, or NULL for none.
 181    REGP is the structure that says where to store the "register"
 182    values that will result from matching this pattern.
 183    If it is 0, we should compile the pattern not to record any
 184    subexpression bounds.
 185    POSIX is nonzero if we want full backtracking (POSIX style)
 186    for this pattern.  0 means backtrack only enough to get a valid match.  */
 187
 188 struct re_pattern_buffer *
 189 compile_pattern (pattern, regp, translate, posix, multibyte)
 190      Lisp_Object pattern;
 191      struct re_registers *regp;
 192      Lisp_Object *translate;
 193      int posix, multibyte;
 194 {
 195   struct regexp_cache *cp, **cpp;
 196
 197   for (cpp = &searchbuf_head; ; cpp = &cp->next)
 198     {
 199       cp = *cpp;
 200       if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
 201           && !NILP (Fstring_equal (cp->regexp, pattern))
 202           && cp->buf.translate == translate
 203           && cp->posix == posix
 204           && cp->buf.multibyte == multibyte)
 205         break;
 206
 207       /* If we're at the end of the cache, compile into the last cell.  */
 208       if (cp->next == 0)
 209         {
 210           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
 211           break;
 212         }
 213     }
 214
 215   /* When we get here, cp (aka *cpp) contains the compiled pattern,
 216      either because we found it in the cache or because we just compiled it.
 217      Move it to the front of the queue to mark it as most recently used.  */
 218   *cpp = cp->next;
 219   cp->next = searchbuf_head;
 220   searchbuf_head = cp;
 221
 222   /* Advise the searching functions about the space we have allocated
 223      for register data.  */
 224   if (regp)
 225     re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
 226
 227   return &cp->buf;
 228 }
 229
 230 /* Error condition used for failing searches */
 231 Lisp_Object Qsearch_failed;
 232
 233 Lisp_Object
 234 signal_failure (arg)
 235      Lisp_Object arg;
 236 {
 237   Fsignal (Qsearch_failed, Fcons (arg, Qnil));
 238   return Qnil;
 239 }
 240 \f
 241 static Lisp_Object
 242 looking_at_1 (string, posix)
 243      Lisp_Object string;
 244      int posix;
 245 {
 246   Lisp_Object val;
 247   unsigned char *p1, *p2;
 248   int s1, s2;
 249   register int i;
 250   struct re_pattern_buffer *bufp;
 251
 252   if (running_asynch_code)
 253     save_search_regs ();
 254
 255   CHECK_STRING (string, 0);
 256   bufp = compile_pattern (string, &search_regs,
 257                           (!NILP (current_buffer->case_fold_search)
 258                            ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
 259                           posix,
 260                           !NILP (current_buffer->enable_multibyte_characters));
 261
 262   immediate_quit = 1;
 263   QUIT;                 /* Do a pending quit right away, to avoid paradoxical behavior */
 264
 265   /* Get pointers and sizes of the two strings
 266      that make up the visible portion of the buffer. */
 267
 268   p1 = BEGV_ADDR;
 269   s1 = GPT_BYTE - BEGV_BYTE;
 270   p2 = GAP_END_ADDR;
 271   s2 = ZV_BYTE - GPT_BYTE;
 272   if (s1 < 0)
 273     {
 274       p2 = p1;
 275       s2 = ZV_BYTE - BEGV_BYTE;
 276       s1 = 0;
 277     }
 278   if (s2 < 0)
 279     {
 280       s1 = ZV_BYTE - BEGV_BYTE;
 281       s2 = 0;
 282     }
 283
 284   re_match_object = Qnil;
 285
 286   i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
 287                   PT_BYTE - BEGV_BYTE, &search_regs,
 288                   ZV_BYTE - BEGV_BYTE);
 289   if (i == -2)
 290     matcher_overflow ();
 291
 292   val = (0 <= i ? Qt : Qnil);
 293   if (i >= 0)
 294     for (i = 0; i < search_regs.num_regs; i++)
 295       if (search_regs.start[i] >= 0)
 296         {
 297           search_regs.start[i]
 298             = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
 299           search_regs.end[i]
 300             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
 301         }
 302   XSETBUFFER (last_thing_searched, current_buffer);
 303   immediate_quit = 0;
 304   return val;
 305 }
 306
 307 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
 308   "Return t if text after point matches regular expression REGEXP.\n\
 309 This function modifies the match data that `match-beginning',\n\
 310 `match-end' and `match-data' access; save and restore the match\n\
 311 data if you want to preserve them.")
 312   (regexp)
 313      Lisp_Object regexp;
 314 {
 315   return looking_at_1 (regexp, 0);
 316 }
 317
 318 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
 319   "Return t if text after point matches regular expression REGEXP.\n\
 320 Find the longest match, in accord with Posix regular expression rules.\n\
 321 This function modifies the match data that `match-beginning',\n\
 322 `match-end' and `match-data' access; save and restore the match\n\
 323 data if you want to preserve them.")
 324   (regexp)
 325      Lisp_Object regexp;
 326 {
 327   return looking_at_1 (regexp, 1);
 328 }
 329 \f
 330 static Lisp_Object
 331 string_match_1 (regexp, string, start, posix)
 332      Lisp_Object regexp, string, start;
 333      int posix;
 334 {
 335   int val;
 336   struct re_pattern_buffer *bufp;
 337   int pos, pos_byte;
 338   int i;
 339
 340   if (running_asynch_code)
 341     save_search_regs ();
 342
 343   CHECK_STRING (regexp, 0);
 344   CHECK_STRING (string, 1);
 345
 346   if (NILP (start))
 347     pos = 0, pos_byte = 0;
 348   else
 349     {
 350       int len = XSTRING (string)->size;
 351
 352       CHECK_NUMBER (start, 2);
 353       pos = XINT (start);
 354       if (pos < 0 && -pos <= len)
 355         pos = len + pos;
 356       else if (0 > pos || pos > len)
 357         args_out_of_range (string, start);
 358       pos_byte = string_char_to_byte (string, pos);
 359     }
 360
 361   bufp = compile_pattern (regexp, &search_regs,
 362                           (!NILP (current_buffer->case_fold_search)
 363                            ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
 364                           posix,
 365                           STRING_MULTIBYTE (string));
 366   immediate_quit = 1;
 367   re_match_object = string;
 368
 369   val = re_search (bufp, (char *) XSTRING (string)->data,
 370                    XSTRING (string)->size_byte, pos_byte,
 371                    XSTRING (string)->size_byte - pos_byte,
 372                    &search_regs);
 373   immediate_quit = 0;
 374   last_thing_searched = Qt;
 375   if (val == -2)
 376     matcher_overflow ();
 377   if (val < 0) return Qnil;
 378
 379   for (i = 0; i < search_regs.num_regs; i++)
 380     if (search_regs.start[i] >= 0)
 381       {
 382         search_regs.start[i]
 383           = string_byte_to_char (string, search_regs.start[i]);
 384         search_regs.end[i]
 385           = string_byte_to_char (string, search_regs.end[i]);
 386       }
 387
 388   return make_number (string_byte_to_char (string, val));
 389 }
 390
 391 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
 392   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 393 If third arg START is non-nil, start search at that index in STRING.\n\
 394 For index of first char beyond the match, do (match-end 0).\n\
 395 `match-end' and `match-beginning' also give indices of substrings\n\
 396 matched by parenthesis constructs in the pattern.")
 397   (regexp, string, start)
 398      Lisp_Object regexp, string, start;
 399 {
 400   return string_match_1 (regexp, string, start, 0);
 401 }
 402
 403 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
 404   "Return index of start of first match for REGEXP in STRING, or nil.\n\
 405 Find the longest match, in accord with Posix regular expression rules.\n\
 406 If third arg START is non-nil, start search at that index in STRING.\n\
 407 For index of first char beyond the match, do (match-end 0).\n\
 408 `match-end' and `match-beginning' also give indices of substrings\n\
 409 matched by parenthesis constructs in the pattern.")
 410   (regexp, string, start)
 411      Lisp_Object regexp, string, start;
 412 {
 413   return string_match_1 (regexp, string, start, 1);
 414 }
 415
 416 /* Match REGEXP against STRING, searching all of STRING,
 417    and return the index of the match, or negative on failure.
 418    This does not clobber the match data.  */
 419
 420 int
 421 fast_string_match (regexp, string)
 422      Lisp_Object regexp, string;
 423 {
 424   int val;
 425   struct re_pattern_buffer *bufp;
 426
 427   bufp = compile_pattern (regexp, 0, 0, 0, STRING_MULTIBYTE (string));
 428   immediate_quit = 1;
 429   re_match_object = string;
 430
 431   val = re_search (bufp, (char *) XSTRING (string)->data,
 432                    XSTRING (string)->size_byte, 0, XSTRING (string)->size_byte,
 433                    0);
 434   immediate_quit = 0;
 435   return val;
 436 }
 437
 438 /* Match REGEXP against STRING, searching all of STRING ignoring case,
 439    and return the index of the match, or negative on failure.
 440    This does not clobber the match data.
 441    We assume that STRING contains single-byte characters.  */
 442
 443 extern Lisp_Object Vascii_downcase_table;
 444
 445 int
 446 fast_c_string_match_ignore_case (regexp, string)
 447      Lisp_Object regexp;
 448      char *string;
 449 {
 450   int val;
 451   struct re_pattern_buffer *bufp;
 452   int len = strlen (string);
 453
 454   regexp = string_make_unibyte (regexp);
 455   re_match_object = Qt;
 456   bufp = compile_pattern (regexp, 0,
 457                           XCHAR_TABLE (Vascii_downcase_table)->contents, 0,
 458                           0);
 459   immediate_quit = 1;
 460   val = re_search (bufp, string, len, 0, len, 0);
 461   immediate_quit = 0;
 462   return val;
 463 }
 464 \f
 465 /* max and min.  */
 466
 467 static int
 468 max (a, b)
 469      int a, b;
 470 {
 471   return ((a > b) ? a : b);
 472 }
 473
 474 static int
 475 min (a, b)
 476      int a, b;
 477 {
 478   return ((a < b) ? a : b);
 479 }
 480
 481 \f
 482 /* The newline cache: remembering which sections of text have no newlines.  */
 483
 484 /* If the user has requested newline caching, make sure it's on.
 485    Otherwise, make sure it's off.
 486    This is our cheezy way of associating an action with the change of
 487    state of a buffer-local variable.  */
 488 static void
 489 newline_cache_on_off (buf)
 490      struct buffer *buf;
 491 {
 492   if (NILP (buf->cache_long_line_scans))
 493     {
 494       /* It should be off.  */
 495       if (buf->newline_cache)
 496         {
 497           free_region_cache (buf->newline_cache);
 498           buf->newline_cache = 0;
 499         }
 500     }
 501   else
 502     {
 503       /* It should be on.  */
 504       if (buf->newline_cache == 0)
 505         buf->newline_cache = new_region_cache ();
 506     }
 507 }
 508
 509 \f
 510 /* Search for COUNT instances of the character TARGET between START and END.
 511
 512    If COUNT is positive, search forwards; END must be >= START.
 513    If COUNT is negative, search backwards for the -COUNTth instance;
 514       END must be <= START.
 515    If COUNT is zero, do anything you please; run rogue, for all I care.
 516
 517    If END is zero, use BEGV or ZV instead, as appropriate for the
 518    direction indicated by COUNT.
 519
 520    If we find COUNT instances, set *SHORTAGE to zero, and return the
 521    position after the COUNTth match.  Note that for reverse motion
 522    this is not the same as the usual convention for Emacs motion commands.
 523
 524    If we don't find COUNT instances before reaching END, set *SHORTAGE
 525    to the number of TARGETs left unfound, and return END.
 526
 527    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 528    except when inside redisplay.  */
 529
 530 scan_buffer (target, start, end, count, shortage, allow_quit)
 531      register int target;
 532      int start, end;
 533      int count;
 534      int *shortage;
 535      int allow_quit;
 536 {
 537   struct region_cache *newline_cache;
 538   int direction;
 539
 540   if (count > 0)
 541     {
 542       direction = 1;
 543       if (! end) end = ZV;
 544     }
 545   else
 546     {
 547       direction = -1;
 548       if (! end) end = BEGV;
 549     }
 550
 551   newline_cache_on_off (current_buffer);
 552   newline_cache = current_buffer->newline_cache;
 553
 554   if (shortage != 0)
 555     *shortage = 0;
 556
 557   immediate_quit = allow_quit;
 558
 559   if (count > 0)
 560     while (start != end)
 561       {
 562         /* Our innermost scanning loop is very simple; it doesn't know
 563            about gaps, buffer ends, or the newline cache.  ceiling is
 564            the position of the last character before the next such
 565            obstacle --- the last character the dumb search loop should
 566            examine.  */
 567         int ceiling_byte = CHAR_TO_BYTE (end) - 1;
 568         int start_byte = CHAR_TO_BYTE (start);
 569
 570         /* If we're looking for a newline, consult the newline cache
 571            to see where we can avoid some scanning.  */
 572         if (target == '\n' && newline_cache)
 573           {
 574             int next_change;
 575             immediate_quit = 0;
 576             while (region_cache_forward
 577                    (current_buffer, newline_cache, start_byte, &next_change))
 578               start_byte = next_change;
 579             immediate_quit = allow_quit;
 580
 581             /* START should never be after END.  */
 582             if (start_byte > ceiling_byte)
 583               start_byte = ceiling_byte;
 584
 585             /* Now the text after start is an unknown region, and
 586                next_change is the position of the next known region. */
 587             ceiling_byte = min (next_change - 1, ceiling_byte);
 588           }
 589
 590         /* The dumb loop can only scan text stored in contiguous
 591            bytes. BUFFER_CEILING_OF returns the last character
 592            position that is contiguous, so the ceiling is the
 593            position after that.  */
 594         ceiling_byte = min (BUFFER_CEILING_OF (start_byte), ceiling_byte);
 595
 596         {
 597           /* The termination address of the dumb loop.  */
 598           register unsigned char *ceiling_addr
 599             = BYTE_POS_ADDR (ceiling_byte) + 1;
 600           register unsigned char *cursor
 601             = BYTE_POS_ADDR (start_byte);
 602           unsigned char *base = cursor;
 603
 604           while (cursor < ceiling_addr)
 605             {
 606               unsigned char *scan_start = cursor;
 607
 608               /* The dumb loop.  */
 609               while (*cursor != target && ++cursor < ceiling_addr)
 610                 ;
 611
 612               /* If we're looking for newlines, cache the fact that
 613                  the region from start to cursor is free of them. */
 614               if (target == '\n' && newline_cache)
 615                 know_region_cache (current_buffer, newline_cache,
 616                                    start_byte + scan_start - base,
 617                                    start_byte + cursor - base);
 618
 619               /* Did we find the target character?  */
 620               if (cursor < ceiling_addr)
 621                 {
 622                   if (--count == 0)
 623                     {
 624                       immediate_quit = 0;
 625                       return BYTE_TO_CHAR (start_byte + cursor - base + 1);
 626                     }
 627                   cursor++;
 628                 }
 629             }
 630
 631           start = BYTE_TO_CHAR (start_byte + cursor - base);
 632         }
 633       }
 634   else
 635     while (start > end)
 636       {
 637         /* The last character to check before the next obstacle.  */
 638         int ceiling_byte = CHAR_TO_BYTE (end);
 639         int start_byte = CHAR_TO_BYTE (start);
 640
 641         /* Consult the newline cache, if appropriate.  */
 642         if (target == '\n' && newline_cache)
 643           {
 644             int next_change;
 645             immediate_quit = 0;
 646             while (region_cache_backward
 647                    (current_buffer, newline_cache, start_byte, &next_change))
 648               start_byte = next_change;
 649             immediate_quit = allow_quit;
 650
 651             /* Start should never be at or before end.  */
 652             if (start_byte <= ceiling_byte)
 653               start_byte = ceiling_byte + 1;
 654
 655             /* Now the text before start is an unknown region, and
 656                next_change is the position of the next known region. */
 657             ceiling_byte = max (next_change, ceiling_byte);
 658           }
 659
 660         /* Stop scanning before the gap.  */
 661         ceiling_byte = max (BUFFER_FLOOR_OF (start_byte - 1), ceiling_byte);
 662
 663         {
 664           /* The termination address of the dumb loop.  */
 665           register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
 666           register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
 667           unsigned char *base = cursor;
 668
 669           while (cursor >= ceiling_addr)
 670             {
 671               unsigned char *scan_start = cursor;
 672
 673               while (*cursor != target && --cursor >= ceiling_addr)
 674                 ;
 675
 676               /* If we're looking for newlines, cache the fact that
 677                  the region from after the cursor to start is free of them.  */
 678               if (target == '\n' && newline_cache)
 679                 know_region_cache (current_buffer, newline_cache,
 680                                    start_byte + cursor - base,
 681                                    start_byte + scan_start - base);
 682
 683               /* Did we find the target character?  */
 684               if (cursor >= ceiling_addr)
 685                 {
 686                   if (++count >= 0)
 687                     {
 688                       immediate_quit = 0;
 689                       return BYTE_TO_CHAR (start_byte + cursor - base);
 690                     }
 691                   cursor--;
 692                 }
 693             }
 694
 695           start = BYTE_TO_CHAR (start_byte + cursor - base);
 696         }
 697       }
 698
 699   immediate_quit = 0;
 700   if (shortage != 0)
 701     *shortage = count * direction;
 702   return start;
 703 }
 704 \f
 705 /* Search for COUNT instances of a line boundary, which means either a
 706    newline or (if selective display enabled) a carriage return.
 707    Start at START.  If COUNT is negative, search backwards.
 708
 709    We report the resulting position by calling TEMP_SET_PT_BOTH.
 710
 711    If we find COUNT instances. we position after (always after,
 712    even if scanning backwards) the COUNTth match, and return 0.
 713
 714    If we don't find COUNT instances before reaching the end of the
 715    buffer (or the beginning, if scanning backwards), we return
 716    the number of line boundaries left unfound, and position at
 717    the limit we bumped up against.
 718
 719    If ALLOW_QUIT is non-zero, set immediate_quit.  That's good to do
 720    except in special cases.  */
 721
 722 int
 723 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
 724      int start, start_byte;
 725      int limit, limit_byte;
 726      register int count;
 727      int allow_quit;
 728 {
 729   int direction = ((count > 0) ? 1 : -1);
 730
 731   register unsigned char *cursor;
 732   unsigned char *base;
 733
 734   register int ceiling;
 735   register unsigned char *ceiling_addr;
 736
 737   int old_immediate_quit = immediate_quit;
 738
 739   /* If we are not in selective display mode,
 740      check only for newlines.  */
 741   int selective_display = (!NILP (current_buffer->selective_display)
 742                            && !INTEGERP (current_buffer->selective_display));
 743
 744   /* The code that follows is like scan_buffer
 745      but checks for either newline or carriage return.  */
 746
 747   if (allow_quit)
 748     immediate_quit++;
 749
 750   start_byte = CHAR_TO_BYTE (start);
 751
 752   if (count > 0)
 753     {
 754       while (start_byte < limit_byte)
 755         {
 756           ceiling =  BUFFER_CEILING_OF (start_byte);
 757           ceiling = min (limit_byte - 1, ceiling);
 758           ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
 759           base = (cursor = BYTE_POS_ADDR (start_byte));
 760           while (1)
 761             {
 762               while (*cursor != '\n' && ++cursor != ceiling_addr)
 763                 ;
 764
 765               if (cursor != ceiling_addr)
 766                 {
 767                   if (--count == 0)
 768                     {
 769                       immediate_quit = old_immediate_quit;
 770                       start_byte = start_byte + cursor - base + 1;
 771                       start = BYTE_TO_CHAR (start_byte);
 772                       TEMP_SET_PT_BOTH (start, start_byte);
 773                       return 0;
 774                     }
 775                   else
 776                     if (++cursor == ceiling_addr)
 777                       break;
 778                 }
 779               else
 780                 break;
 781             }
 782           start_byte += cursor - base;
 783         }
 784     }
 785   else
 786     {
 787       int start_byte = CHAR_TO_BYTE (start);
 788       while (start_byte > limit_byte)
 789         {
 790           ceiling = BUFFER_FLOOR_OF (start_byte - 1);
 791           ceiling = max (limit_byte, ceiling);
 792           ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
 793           base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
 794           while (1)
 795             {
 796               while (--cursor != ceiling_addr && *cursor != '\n')
 797                 ;
 798
 799               if (cursor != ceiling_addr)
 800                 {
 801                   if (++count == 0)
 802                     {
 803                       immediate_quit = old_immediate_quit;
 804                       /* Return the position AFTER the match we found.  */
 805                       start_byte = start_byte + cursor - base + 1;
 806                       start = BYTE_TO_CHAR (start_byte);
 807                       TEMP_SET_PT_BOTH (start, start_byte);
 808                       return 0;
 809                     }
 810                 }
 811               else
 812                 break;
 813             }
 814           /* Here we add 1 to compensate for the last decrement
 815              of CURSOR, which took it past the valid range.  */
 816           start_byte += cursor - base + 1;
 817         }
 818     }
 819
 820   TEMP_SET_PT_BOTH (limit, limit_byte);
 821   immediate_quit = old_immediate_quit;
 822
 823   return count * direction;
 824 }
 825
 826 int
 827 find_next_newline_no_quit (from, cnt)
 828      register int from, cnt;
 829 {
 830   return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
 831 }
 832
 833 /* Like find_next_newline, but returns position before the newline,
 834    not after, and only search up to TO.  This isn't just
 835    find_next_newline (...)-1, because you might hit TO.  */
 836
 837 int
 838 find_before_next_newline (from, to, cnt)
 839      int from, to, cnt;
 840 {
 841   int shortage;
 842   int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
 843
 844   if (shortage == 0)
 845     pos--;
 846
 847   return pos;
 848 }
 849 \f
 850 /* Subroutines of Lisp buffer search functions. */
 851
 852 static Lisp_Object
 853 search_command (string, bound, noerror, count, direction, RE, posix)
 854      Lisp_Object string, bound, noerror, count;
 855      int direction;
 856      int RE;
 857      int posix;
 858 {
 859   register int np;
 860   int lim;
 861   int n = direction;
 862
 863   if (!NILP (count))
 864     {
 865       CHECK_NUMBER (count, 3);
 866       n *= XINT (count);
 867     }
 868
 869   CHECK_STRING (string, 0);
 870   if (NILP (bound))
 871     lim = n > 0 ? ZV : BEGV;
 872   else
 873     {
 874       CHECK_NUMBER_COERCE_MARKER (bound, 1);
 875       lim = XINT (bound);
 876       if (n > 0 ? lim < PT : lim > PT)
 877         error ("Invalid search bound (wrong side of point)");
 878       if (lim > ZV)
 879         lim = ZV;
 880       if (lim < BEGV)
 881         lim = BEGV;
 882     }
 883
 884   np = search_buffer (string, PT, lim, n, RE,
 885                       (!NILP (current_buffer->case_fold_search)
 886                        ? XCHAR_TABLE (current_buffer->case_canon_table)->contents
 887                        : 0),
 888                       (!NILP (current_buffer->case_fold_search)
 889                        ? XCHAR_TABLE (current_buffer->case_eqv_table)->contents
 890                        : 0),
 891                       posix);
 892   if (np <= 0)
 893     {
 894       if (NILP (noerror))
 895         return signal_failure (string);
 896       if (!EQ (noerror, Qt))
 897         {
 898           if (lim < BEGV || lim > ZV)
 899             abort ();
 900           SET_PT (lim);
 901           return Qnil;
 902 #if 0 /* This would be clean, but maybe programs depend on
 903          a value of nil here.  */
 904           np = lim;
 905 #endif
 906         }
 907       else
 908         return Qnil;
 909     }
 910
 911   if (np < BEGV || np > ZV)
 912     abort ();
 913
 914   SET_PT (np);
 915
 916   return make_number (np);
 917 }
 918 \f
 919 /* Return 1 if REGEXP it matches just one constant string.  */
 920
 921 static int
 922 trivial_regexp_p (regexp)
 923      Lisp_Object regexp;
 924 {
 925   int len = XSTRING (regexp)->size_byte;
 926   unsigned char *s = XSTRING (regexp)->data;
 927   unsigned char c;
 928   while (--len >= 0)
 929     {
 930       switch (*s++)
 931         {
 932         case '.': case '*': case '+': case '?': case '[': case '^': case '$':
 933           return 0;
 934         case '\\':
 935           if (--len < 0)
 936             return 0;
 937           switch (*s++)
 938             {
 939             case '|': case '(': case ')': case '`': case '\'': case 'b':
 940             case 'B': case '<': case '>': case 'w': case 'W': case 's':
 941             case 'S': case '=':
 942             case 'c': case 'C': /* for categoryspec and notcategoryspec */
 943             case '1': case '2': case '3': case '4': case '5':
 944             case '6': case '7': case '8': case '9':
 945               return 0;
 946             }
 947         }
 948     }
 949   return 1;
 950 }
 951
 952 /* Search for the n'th occurrence of STRING in the current buffer,
 953    starting at position POS and stopping at position LIM,
 954    treating STRING as a literal string if RE is false or as
 955    a regular expression if RE is true.
 956
 957    If N is positive, searching is forward and LIM must be greater than POS.
 958    If N is negative, searching is backward and LIM must be less than POS.
 959
 960    Returns -x if only N-x occurrences found (x > 0),
 961    or else the position at the beginning of the Nth occurrence
 962    (if searching backward) or the end (if searching forward).
 963
 964    POSIX is nonzero if we want full backtracking (POSIX style)
 965    for this pattern.  0 means backtrack only enough to get a valid match.  */
 966
 967 static int
 968 search_buffer (string, pos, lim, n, RE, trt, inverse_trt, posix)
 969      Lisp_Object string;
 970      int pos;
 971      int lim;
 972      int n;
 973      int RE;
 974      Lisp_Object *trt;
 975      Lisp_Object *inverse_trt;
 976      int posix;
 977 {
 978   int len = XSTRING (string)->size;
 979   int len_byte = XSTRING (string)->size_byte;
 980   unsigned char *base_pat = XSTRING (string)->data;
 981   register int *BM_tab;
 982   int *BM_tab_base;
 983   register int direction = ((n > 0) ? 1 : -1);
 984   register int dirlen;
 985   int infinity, limit, k, stride_for_teases;
 986   register unsigned char *pat, *cursor, *p_limit;
 987   register int i, j;
 988   unsigned char *p1, *p2;
 989   int s1, s2;
 990
 991   if (running_asynch_code)
 992     save_search_regs ();
 993
 994   /* Null string is found at starting position.  */
 995   if (len == 0)
 996     {
 997       set_search_regs (pos, 0);
 998       return pos;
 999     }
1000
1001   /* Searching 0 times means don't move.  */
1002   if (n == 0)
1003     return pos;
1004
1005   if (RE && !trivial_regexp_p (string))
1006     {
1007       struct re_pattern_buffer *bufp;
1008       int pos_byte = CHAR_TO_BYTE (pos);
1009       int lim_byte = CHAR_TO_BYTE (lim);
1010
1011       bufp = compile_pattern (string, &search_regs, trt, posix,
1012                               !NILP (current_buffer->enable_multibyte_characters));
1013
1014       immediate_quit = 1;       /* Quit immediately if user types ^G,
1015                                    because letting this function finish
1016                                    can take too long. */
1017       QUIT;                     /* Do a pending quit right away,
1018                                    to avoid paradoxical behavior */
1019       /* Get pointers and sizes of the two strings
1020          that make up the visible portion of the buffer. */
1021
1022       p1 = BEGV_ADDR;
1023       s1 = GPT_BYTE - BEGV_BYTE;
1024       p2 = GAP_END_ADDR;
1025       s2 = ZV_BYTE - GPT_BYTE;
1026       if (s1 < 0)
1027         {
1028           p2 = p1;
1029           s2 = ZV_BYTE - BEGV_BYTE;
1030           s1 = 0;
1031         }
1032       if (s2 < 0)
1033         {
1034           s1 = ZV_BYTE - BEGV_BYTE;
1035           s2 = 0;
1036         }
1037       re_match_object = Qnil;
1038
1039       while (n < 0)
1040         {
1041           int val;
1042           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1043                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1044                              &search_regs,
1045                              /* Don't allow match past current point */
1046                              pos_byte - BEGV_BYTE);
1047           if (val == -2)
1048             {
1049               matcher_overflow ();
1050             }
1051           if (val >= 0)
1052             {
1053               for (i = 0; i < search_regs.num_regs; i++)
1054                 if (search_regs.start[i] >= 0)
1055                   {
1056                     search_regs.start[i]
1057                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1058                     search_regs.end[i]
1059                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1060                   }
1061               XSETBUFFER (last_thing_searched, current_buffer);
1062               /* Set pos to the new position. */
1063               pos = search_regs.start[0];
1064             }
1065           else
1066             {
1067               immediate_quit = 0;
1068               return (n);
1069             }
1070           n++;
1071         }
1072       while (n > 0)
1073         {
1074           int val;
1075           val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1076                              pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1077                              &search_regs,
1078                              lim_byte - BEGV_BYTE);
1079           if (val == -2)
1080             {
1081               matcher_overflow ();
1082             }
1083           if (val >= 0)
1084             {
1085               for (i = 0; i < search_regs.num_regs; i++)
1086                 if (search_regs.start[i] >= 0)
1087                   {
1088                     search_regs.start[i]
1089                       = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1090                     search_regs.end[i]
1091                       = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1092                   }
1093               XSETBUFFER (last_thing_searched, current_buffer);
1094               pos = search_regs.end[0];
1095             }
1096           else
1097             {
1098               immediate_quit = 0;
1099               return (0 - n);
1100             }
1101           n--;
1102         }
1103       immediate_quit = 0;
1104       return (pos);
1105     }
1106   else                          /* non-RE case */
1107     {
1108       int pos_byte = CHAR_TO_BYTE (pos);
1109       int lim_byte = CHAR_TO_BYTE (lim);
1110 #ifdef C_ALLOCA
1111       int BM_tab_space[0400];
1112       BM_tab = &BM_tab_space[0];
1113 #else
1114       BM_tab = (int *) alloca (0400 * sizeof (int));
1115 #endif
1116       {
1117         unsigned char *raw_pattern;
1118         int raw_pattern_size;
1119         unsigned char *patbuf;
1120         int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1121
1122         /* MULTIBYTE says whether the text to be searched is multibyte.
1123            We must convert PATTERN to match that, or we will not really
1124            find things right.  */
1125
1126         if (multibyte == STRING_MULTIBYTE (string))
1127           {
1128             raw_pattern = (char *) XSTRING (string)->data;
1129             raw_pattern_size = XSTRING (string)->size_byte;
1130           }
1131         else if (multibyte)
1132           {
1133             raw_pattern_size = count_size_as_multibyte (XSTRING (string)->data,
1134                                                         XSTRING (string)->size);
1135             raw_pattern = (char *) alloca (raw_pattern_size + 1);
1136             copy_text (XSTRING (string)->data, raw_pattern,
1137                        XSTRING (string)->size, 0, 1);
1138           }
1139         else
1140           {
1141             /* Converting multibyte to single-byte.
1142
1143                ??? Perhaps this conversion should be done in a special way
1144                by subtracting nonascii-insert-offset from each non-ASCII char,
1145                so that only the multibyte chars which really correspond to
1146                the chosen single-byte character set can possibly match.  */
1147             raw_pattern_size = XSTRING (string)->size;
1148             raw_pattern = (char *) alloca (raw_pattern_size + 1);
1149             copy_text (XSTRING (string)->data, raw_pattern,
1150                        XSTRING (string)->size, 1, 0);
1151           }
1152
1153         len_byte = raw_pattern_size;
1154         patbuf = (unsigned char *) alloca (len_byte);
1155         pat = patbuf;
1156         base_pat = raw_pattern;
1157         while (--len_byte >= 0)
1158           {
1159             /* If we got here and the RE flag is set, it's because we're
1160                dealing with a regexp known to be trivial, so the backslash
1161                just quotes the next character.  */
1162             if (RE && *base_pat == '\\')
1163               {
1164                 len_byte--;
1165                 base_pat++;
1166               }
1167             *pat++ = (trt ? XINT (trt[*base_pat++]) : *base_pat++);
1168           }
1169         len_byte = pat - patbuf;
1170         pat = base_pat = patbuf;
1171       }
1172       /* The general approach is that we are going to maintain that we know */
1173       /* the first (closest to the present position, in whatever direction */
1174       /* we're searching) character that could possibly be the last */
1175       /* (furthest from present position) character of a valid match.  We */
1176       /* advance the state of our knowledge by looking at that character */
1177       /* and seeing whether it indeed matches the last character of the */
1178       /* pattern.  If it does, we take a closer look.  If it does not, we */
1179       /* move our pointer (to putative last characters) as far as is */
1180       /* logically possible.  This amount of movement, which I call a */
1181       /* stride, will be the length of the pattern if the actual character */
1182       /* appears nowhere in the pattern, otherwise it will be the distance */
1183       /* from the last occurrence of that character to the end of the */
1184       /* pattern. */
1185       /* As a coding trick, an enormous stride is coded into the table for */
1186       /* characters that match the last character.  This allows use of only */
1187       /* a single test, a test for having gone past the end of the */
1188       /* permissible match region, to test for both possible matches (when */
1189       /* the stride goes past the end immediately) and failure to */
1190       /* match (where you get nudged past the end one stride at a time). */
1191
1192       /* Here we make a "mickey mouse" BM table.  The stride of the search */
1193       /* is determined only by the last character of the putative match. */
1194       /* If that character does not match, we will stride the proper */
1195       /* distance to propose a match that superimposes it on the last */
1196       /* instance of a character that matches it (per trt), or misses */
1197       /* it entirely if there is none. */
1198
1199       dirlen = len_byte * direction;
1200       infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1201       if (direction < 0)
1202         pat = (base_pat += len_byte - 1);
1203       BM_tab_base = BM_tab;
1204       BM_tab += 0400;
1205       j = dirlen;               /* to get it in a register */
1206       /* A character that does not appear in the pattern induces a */
1207       /* stride equal to the pattern length. */
1208       while (BM_tab_base != BM_tab)
1209         {
1210           *--BM_tab = j;
1211           *--BM_tab = j;
1212           *--BM_tab = j;
1213           *--BM_tab = j;
1214         }
1215       i = 0;
1216       while (i != infinity)
1217         {
1218           j = pat[i]; i += direction;
1219           if (i == dirlen) i = infinity;
1220           if (trt != 0)
1221             {
1222               k = (j = XINT (trt[j]));
1223               if (i == infinity)
1224                 stride_for_teases = BM_tab[j];
1225               BM_tab[j] = dirlen - i;
1226               /* A translation table is accompanied by its inverse -- see */
1227               /* comment following downcase_table for details */
1228               while ((j = (unsigned char) XINT (inverse_trt[j])) != k)
1229                 BM_tab[j] = dirlen - i;
1230             }
1231           else
1232             {
1233               if (i == infinity)
1234                 stride_for_teases = BM_tab[j];
1235               BM_tab[j] = dirlen - i;
1236             }
1237           /* stride_for_teases tells how much to stride if we get a */
1238           /* match on the far character but are subsequently */
1239           /* disappointed, by recording what the stride would have been */
1240           /* for that character if the last character had been */
1241           /* different. */
1242         }
1243       infinity = dirlen - infinity;
1244       pos_byte += dirlen - ((direction > 0) ? direction : 0);
1245       /* loop invariant - POS_BYTE points at where last char (first
1246          char if reverse) of pattern would align in a possible match.  */
1247       while (n != 0)
1248         {
1249           /* It's been reported that some (broken) compiler thinks that
1250              Boolean expressions in an arithmetic context are unsigned.
1251              Using an explicit ?1:0 prevents this.  */
1252           if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1253               < 0)
1254             return (n * (0 - direction));
1255           /* First we do the part we can by pointers (maybe nothing) */
1256           QUIT;
1257           pat = base_pat;
1258           limit = pos_byte - dirlen + direction;
1259           limit = ((direction > 0)
1260                    ? BUFFER_CEILING_OF (limit)
1261                    : BUFFER_FLOOR_OF (limit));
1262           /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1263              can take on without hitting edge of buffer or the gap.  */
1264           limit = ((direction > 0)
1265                    ? min (lim_byte - 1, min (limit, pos_byte + 20000))
1266                    : max (lim_byte, max (limit, pos_byte - 20000)));
1267           if ((limit - pos_byte) * direction > 20)
1268             {
1269               p_limit = BYTE_POS_ADDR (limit);
1270               p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1271               /* In this loop, pos + cursor - p2 is the surrogate for pos */
1272               while (1)         /* use one cursor setting as long as i can */
1273                 {
1274                   if (direction > 0) /* worth duplicating */
1275                     {
1276                       /* Use signed comparison if appropriate
1277                          to make cursor+infinity sure to be > p_limit.
1278                          Assuming that the buffer lies in a range of addresses
1279                          that are all "positive" (as ints) or all "negative",
1280                          either kind of comparison will work as long
1281                          as we don't step by infinity.  So pick the kind
1282                          that works when we do step by infinity.  */
1283                       if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1284                         while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1285                           cursor += BM_tab[*cursor];
1286                       else
1287                         while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1288                           cursor += BM_tab[*cursor];
1289                     }
1290                   else
1291                     {
1292                       if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1293                         while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1294                           cursor += BM_tab[*cursor];
1295                       else
1296                         while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1297                           cursor += BM_tab[*cursor];
1298                     }
1299 /* If you are here, cursor is beyond the end of the searched region. */
1300  /* This can happen if you match on the far character of the pattern, */
1301  /* because the "stride" of that character is infinity, a number able */
1302  /* to throw you well beyond the end of the search.  It can also */
1303  /* happen if you fail to match within the permitted region and would */
1304  /* otherwise try a character beyond that region */
1305                   if ((cursor - p_limit) * direction <= len_byte)
1306                     break;      /* a small overrun is genuine */
1307                   cursor -= infinity; /* large overrun = hit */
1308                   i = dirlen - direction;
1309                   if (trt != 0)
1310                     {
1311                       while ((i -= direction) + direction != 0)
1312                         if (pat[i] != XINT (trt[*(cursor -= direction)]))
1313                           break;
1314                     }
1315                   else
1316                     {
1317                       while ((i -= direction) + direction != 0)
1318                         if (pat[i] != *(cursor -= direction))
1319                           break;
1320                     }
1321                   cursor += dirlen - i - direction;     /* fix cursor */
1322                   if (i + direction == 0)
1323                     {
1324                       int position;
1325
1326                       cursor -= direction;
1327
1328                       position = pos_byte + cursor - p2 + ((direction > 0)
1329                                                            ? 1 - len_byte : 0);
1330                       set_search_regs (position, len_byte);
1331
1332                       if ((n -= direction) != 0)
1333                         cursor += dirlen; /* to resume search */
1334                       else
1335                         return ((direction > 0)
1336                                 ? search_regs.end[0] : search_regs.start[0]);
1337                     }
1338                   else
1339                     cursor += stride_for_teases; /* <sigh> we lose -  */
1340                 }
1341               pos_byte += cursor - p2;
1342             }
1343           else
1344             /* Now we'll pick up a clump that has to be done the hard */
1345             /* way because it covers a discontinuity */
1346             {
1347               limit = ((direction > 0)
1348                        ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1349                        : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1350               limit = ((direction > 0)
1351                        ? min (limit + len_byte, lim_byte - 1)
1352                        : max (limit - len_byte, lim_byte));
1353               /* LIMIT is now the last value POS_BYTE can have
1354                  and still be valid for a possible match.  */
1355               while (1)
1356                 {
1357                   /* This loop can be coded for space rather than */
1358                   /* speed because it will usually run only once. */
1359                   /* (the reach is at most len + 21, and typically */
1360                   /* does not exceed len) */
1361                   while ((limit - pos_byte) * direction >= 0)
1362                     pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1363                   /* now run the same tests to distinguish going off the */
1364                   /* end, a match or a phony match. */
1365                   if ((pos_byte - limit) * direction <= len_byte)
1366                     break;      /* ran off the end */
1367                   /* Found what might be a match.
1368                      Set POS_BYTE back to last (first if reverse) pos.  */
1369                   pos_byte -= infinity;
1370                   i = dirlen - direction;
1371                   while ((i -= direction) + direction != 0)
1372                     {
1373                       pos_byte -= direction;
1374                       if (pat[i] != (trt != 0
1375                                      ? XINT (trt[FETCH_BYTE (pos_byte)])
1376                                      : FETCH_BYTE (pos_byte)))
1377                         break;
1378                     }
1379                   /* Above loop has moved POS_BYTE part or all the way
1380                      back to the first pos (last pos if reverse).
1381                      Set it once again at the last (first if reverse) char.  */
1382                   pos_byte += dirlen - i- direction;
1383                   if (i + direction == 0)
1384                     {
1385                       int position;
1386                       pos_byte -= direction;
1387
1388                       position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1389
1390                       set_search_regs (position, len_byte);
1391
1392                       if ((n -= direction) != 0)
1393                         pos_byte += dirlen; /* to resume search */
1394                       else
1395                         return ((direction > 0)
1396                                 ? search_regs.end[0] : search_regs.start[0]);
1397                     }
1398                   else
1399                     pos_byte += stride_for_teases;
1400                 }
1401               }
1402           /* We have done one clump.  Can we continue? */
1403           if ((lim_byte - pos_byte) * direction < 0)
1404             return ((0 - n) * direction);
1405         }
1406       return BYTE_TO_CHAR (pos_byte);
1407     }
1408 }
1409
1410 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1411    for a match just found in the current buffer.  */
1412
1413 static void
1414 set_search_regs (beg_byte, nbytes)
1415      int beg_byte, nbytes;
1416 {
1417   /* Make sure we have registers in which to store
1418      the match position.  */
1419   if (search_regs.num_regs == 0)
1420     {
1421       search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1422       search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1423       search_regs.num_regs = 2;
1424     }
1425
1426   search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1427   search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1428   XSETBUFFER (last_thing_searched, current_buffer);
1429 }
1430 \f
1431 /* Given a string of words separated by word delimiters,
1432   compute a regexp that matches those exact words
1433   separated by arbitrary punctuation.  */
1434
1435 static Lisp_Object
1436 wordify (string)
1437      Lisp_Object string;
1438 {
1439   register unsigned char *p, *o;
1440   register int i, i_byte, len, punct_count = 0, word_count = 0;
1441   Lisp_Object val;
1442   int prev_c = 0;
1443   int adjust;
1444
1445   CHECK_STRING (string, 0);
1446   p = XSTRING (string)->data;
1447   len = XSTRING (string)->size;
1448
1449   for (i = 0, i_byte = 0; i < len; )
1450     {
1451       int c;
1452
1453       if (STRING_MULTIBYTE (string))
1454         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1455       else
1456         c = XSTRING (string)->data[i++];
1457
1458       if (SYNTAX (c) != Sword)
1459         {
1460           punct_count++;
1461           if (i > 0 && SYNTAX (prev_c) == Sword)
1462             word_count++;
1463         }
1464
1465       prev_c = c;
1466     }
1467
1468   if (SYNTAX (prev_c) == Sword)
1469     word_count++;
1470   if (!word_count)
1471     return build_string ("");
1472
1473   adjust = - punct_count + 5 * (word_count - 1) + 4;
1474   val = make_uninit_multibyte_string (len + adjust,
1475                                       XSTRING (string)->size_byte + adjust);
1476
1477   o = XSTRING (val)->data;
1478   *o++ = '\\';
1479   *o++ = 'b';
1480
1481   for (i = 0; i < XSTRING (val)->size_byte; i++)
1482     if (SYNTAX (p[i]) == Sword)
1483       *o++ = p[i];
1484     else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1485       {
1486         *o++ = '\\';
1487         *o++ = 'W';
1488         *o++ = '\\';
1489         *o++ = 'W';
1490         *o++ = '*';
1491       }
1492
1493   *o++ = '\\';
1494   *o++ = 'b';
1495
1496   return val;
1497 }
1498 \f
1499 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1500   "MSearch backward: ",
1501   "Search backward from point for STRING.\n\
1502 Set point to the beginning of the occurrence found, and return point.\n\
1503 An optional second argument bounds the search; it is a buffer position.\n\
1504 The match found must not extend before that position.\n\
1505 Optional third argument, if t, means if fail just return nil (no error).\n\
1506  If not nil and not t, position at limit of search and return nil.\n\
1507 Optional fourth argument is repeat count--search for successive occurrences.\n\
1508 See also the functions `match-beginning', `match-end' and `replace-match'.")
1509   (string, bound, noerror, count)
1510      Lisp_Object string, bound, noerror, count;
1511 {
1512   return search_command (string, bound, noerror, count, -1, 0, 0);
1513 }
1514
1515 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
1516   "Search forward from point for STRING.\n\
1517 Set point to the end of the occurrence found, and return point.\n\
1518 An optional second argument bounds the search; it is a buffer position.\n\
1519 The match found must not extend after that position.  nil is equivalent\n\
1520   to (point-max).\n\
1521 Optional third argument, if t, means if fail just return nil (no error).\n\
1522   If not nil and not t, move to limit of search and return nil.\n\
1523 Optional fourth argument is repeat count--search for successive occurrences.\n\
1524 See also the functions `match-beginning', `match-end' and `replace-match'.")
1525   (string, bound, noerror, count)
1526      Lisp_Object string, bound, noerror, count;
1527 {
1528   return search_command (string, bound, noerror, count, 1, 0, 0);
1529 }
1530
1531 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
1532   "sWord search backward: ",
1533   "Search backward from point for STRING, ignoring differences in punctuation.\n\
1534 Set point to the beginning of the occurrence found, and return point.\n\
1535 An optional second argument bounds the search; it is a buffer position.\n\
1536 The match found must not extend before that position.\n\
1537 Optional third argument, if t, means if fail just return nil (no error).\n\
1538   If not nil and not t, move to limit of search and return nil.\n\
1539 Optional fourth argument is repeat count--search for successive occurrences.")
1540   (string, bound, noerror, count)
1541      Lisp_Object string, bound, noerror, count;
1542 {
1543   return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
1544 }
1545
1546 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
1547   "sWord search: ",
1548   "Search forward from point for STRING, ignoring differences in punctuation.\n\
1549 Set point to the end of the occurrence found, and return point.\n\
1550 An optional second argument bounds the search; it is a buffer position.\n\
1551 The match found must not extend after that position.\n\
1552 Optional third argument, if t, means if fail just return nil (no error).\n\
1553   If not nil and not t, move to limit of search and return nil.\n\
1554 Optional fourth argument is repeat count--search for successive occurrences.")
1555   (string, bound, noerror, count)
1556      Lisp_Object string, bound, noerror, count;
1557 {
1558   return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
1559 }
1560
1561 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
1562   "sRE search backward: ",
1563   "Search backward from point for match for regular expression REGEXP.\n\
1564 Set point to the beginning of the match, and return point.\n\
1565 The match found is the one starting last in the buffer\n\
1566 and yet ending before the origin of the search.\n\
1567 An optional second argument bounds the search; it is a buffer position.\n\
1568 The match found must start at or after that position.\n\
1569 Optional third argument, if t, means if fail just return nil (no error).\n\
1570   If not nil and not t, move to limit of search and return nil.\n\
1571 Optional fourth argument is repeat count--search for successive occurrences.\n\
1572 See also the functions `match-beginning', `match-end' and `replace-match'.")
1573   (regexp, bound, noerror, count)
1574      Lisp_Object regexp, bound, noerror, count;
1575 {
1576   return search_command (regexp, bound, noerror, count, -1, 1, 0);
1577 }
1578
1579 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
1580   "sRE search: ",
1581   "Search forward from point for regular expression REGEXP.\n\
1582 Set point to the end of the occurrence found, and return point.\n\
1583 An optional second argument bounds the search; it is a buffer position.\n\
1584 The match found must not extend after that position.\n\
1585 Optional third argument, if t, means if fail just return nil (no error).\n\
1586   If not nil and not t, move to limit of search and return nil.\n\
1587 Optional fourth argument is repeat count--search for successive occurrences.\n\
1588 See also the functions `match-beginning', `match-end' and `replace-match'.")
1589   (regexp, bound, noerror, count)
1590      Lisp_Object regexp, bound, noerror, count;
1591 {
1592   return search_command (regexp, bound, noerror, count, 1, 1, 0);
1593 }
1594
1595 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
1596   "sPosix search backward: ",
1597   "Search backward from point for match for regular expression REGEXP.\n\
1598 Find the longest match in accord with Posix regular expression rules.\n\
1599 Set point to the beginning of the match, and return point.\n\
1600 The match found is the one starting last in the buffer\n\
1601 and yet ending before the origin of the search.\n\
1602 An optional second argument bounds the search; it is a buffer position.\n\
1603 The match found must start at or after that position.\n\
1604 Optional third argument, if t, means if fail just return nil (no error).\n\
1605   If not nil and not t, move to limit of search and return nil.\n\
1606 Optional fourth argument is repeat count--search for successive occurrences.\n\
1607 See also the functions `match-beginning', `match-end' and `replace-match'.")
1608   (regexp, bound, noerror, count)
1609      Lisp_Object regexp, bound, noerror, count;
1610 {
1611   return search_command (regexp, bound, noerror, count, -1, 1, 1);
1612 }
1613
1614 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
1615   "sPosix search: ",
1616   "Search forward from point for regular expression REGEXP.\n\
1617 Find the longest match in accord with Posix regular expression rules.\n\
1618 Set point to the end of the occurrence found, and return point.\n\
1619 An optional second argument bounds the search; it is a buffer position.\n\
1620 The match found must not extend after that position.\n\
1621 Optional third argument, if t, means if fail just return nil (no error).\n\
1622   If not nil and not t, move to limit of search and return nil.\n\
1623 Optional fourth argument is repeat count--search for successive occurrences.\n\
1624 See also the functions `match-beginning', `match-end' and `replace-match'.")
1625   (regexp, bound, noerror, count)
1626      Lisp_Object regexp, bound, noerror, count;
1627 {
1628   return search_command (regexp, bound, noerror, count, 1, 1, 1);
1629 }
1630 \f
1631 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
1632   "Replace text matched by last search with NEWTEXT.\n\
1633 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
1634 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
1635 based on the replaced text.\n\
1636 If the replaced text has only capital letters\n\
1637 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
1638 If the replaced text has at least one word starting with a capital letter,\n\
1639 then capitalize each word in NEWTEXT.\n\n\
1640 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1641 Otherwise treat `\\' as special:\n\
1642   `\\&' in NEWTEXT means substitute original matched text.\n\
1643   `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1644        If Nth parens didn't match, substitute nothing.\n\
1645   `\\\\' means insert one `\\'.\n\
1646 FIXEDCASE and LITERAL are optional arguments.\n\
1647 Leaves point at end of replacement text.\n\
1648 \n\
1649 The optional fourth argument STRING can be a string to modify.\n\
1650 In that case, this function creates and returns a new string\n\
1651 which is made by replacing the part of STRING that was matched.\n\
1652 \n\
1653 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
1654 It says to replace just that subexpression instead of the whole match.\n\
1655 This is useful only after a regular expression search or match\n\
1656 since only regular expressions have distinguished subexpressions.")
1657   (newtext, fixedcase, literal, string, subexp)
1658      Lisp_Object newtext, fixedcase, literal, string, subexp;
1659 {
1660   enum { nochange, all_caps, cap_initial } case_action;
1661   register int pos, last;
1662   int some_multiletter_word;
1663   int some_lowercase;
1664   int some_uppercase;
1665   int some_nonuppercase_initial;
1666   register int c, prevc;
1667   int inslen;
1668   int sub;
1669   int opoint, newpoint;
1670
1671   CHECK_STRING (newtext, 0);
1672
1673   if (! NILP (string))
1674     CHECK_STRING (string, 4);
1675
1676   case_action = nochange;       /* We tried an initialization */
1677                                 /* but some C compilers blew it */
1678
1679   if (search_regs.num_regs <= 0)
1680     error ("replace-match called before any match found");
1681
1682   if (NILP (subexp))
1683     sub = 0;
1684   else
1685     {
1686       CHECK_NUMBER (subexp, 3);
1687       sub = XINT (subexp);
1688       if (sub < 0 || sub >= search_regs.num_regs)
1689         args_out_of_range (subexp, make_number (search_regs.num_regs));
1690     }
1691
1692   if (NILP (string))
1693     {
1694       if (search_regs.start[sub] < BEGV
1695           || search_regs.start[sub] > search_regs.end[sub]
1696           || search_regs.end[sub] > ZV)
1697         args_out_of_range (make_number (search_regs.start[sub]),
1698                            make_number (search_regs.end[sub]));
1699     }
1700   else
1701     {
1702       if (search_regs.start[sub] < 0
1703           || search_regs.start[sub] > search_regs.end[sub]
1704           || search_regs.end[sub] > XSTRING (string)->size)
1705         args_out_of_range (make_number (search_regs.start[sub]),
1706                            make_number (search_regs.end[sub]));
1707     }
1708
1709   if (NILP (fixedcase))
1710     {
1711       int beg;
1712       /* Decide how to casify by examining the matched text. */
1713
1714       if (NILP (string))
1715         last = CHAR_TO_BYTE (search_regs.end[sub]);
1716       else
1717         last = search_regs.end[sub];
1718
1719       if (NILP (string))
1720         beg = CHAR_TO_BYTE (search_regs.start[sub]);
1721       else
1722         beg = search_regs.start[sub];
1723
1724       prevc = '\n';
1725       case_action = all_caps;
1726
1727       /* some_multiletter_word is set nonzero if any original word
1728          is more than one letter long. */
1729       some_multiletter_word = 0;
1730       some_lowercase = 0;
1731       some_nonuppercase_initial = 0;
1732       some_uppercase = 0;
1733
1734       for (pos = beg; pos < last; pos++)
1735         {
1736           if (NILP (string))
1737             c = FETCH_BYTE (pos);
1738           else
1739             c = XSTRING (string)->data[pos];
1740
1741           if (LOWERCASEP (c))
1742             {
1743               /* Cannot be all caps if any original char is lower case */
1744
1745               some_lowercase = 1;
1746               if (SYNTAX (prevc) != Sword)
1747                 some_nonuppercase_initial = 1;
1748               else
1749                 some_multiletter_word = 1;
1750             }
1751           else if (!NOCASEP (c))
1752             {
1753               some_uppercase = 1;
1754               if (SYNTAX (prevc) != Sword)
1755                 ;
1756               else
1757                 some_multiletter_word = 1;
1758             }
1759           else
1760             {
1761               /* If the initial is a caseless word constituent,
1762                  treat that like a lowercase initial.  */
1763               if (SYNTAX (prevc) != Sword)
1764                 some_nonuppercase_initial = 1;
1765             }
1766
1767           prevc = c;
1768         }
1769
1770       /* Convert to all caps if the old text is all caps
1771          and has at least one multiletter word.  */
1772       if (! some_lowercase && some_multiletter_word)
1773         case_action = all_caps;
1774       /* Capitalize each word, if the old text has all capitalized words.  */
1775       else if (!some_nonuppercase_initial && some_multiletter_word)
1776         case_action = cap_initial;
1777       else if (!some_nonuppercase_initial && some_uppercase)
1778         /* Should x -> yz, operating on X, give Yz or YZ?
1779            We'll assume the latter.  */
1780         case_action = all_caps;
1781       else
1782         case_action = nochange;
1783     }
1784
1785   /* Do replacement in a string.  */
1786   if (!NILP (string))
1787     {
1788       Lisp_Object before, after;
1789
1790       before = Fsubstring (string, make_number (0),
1791                            make_number (search_regs.start[sub]));
1792       after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
1793
1794       /* Substitute parts of the match into NEWTEXT
1795          if desired.  */
1796       if (NILP (literal))
1797         {
1798           int lastpos = -1;
1799           int lastpos_byte = -1;
1800           /* We build up the substituted string in ACCUM.  */
1801           Lisp_Object accum;
1802           Lisp_Object middle;
1803           int pos_byte;
1804
1805           accum = Qnil;
1806
1807           for (pos_byte = 0, pos = 0; pos_byte < XSTRING (newtext)->size_byte;)
1808             {
1809               int substart = -1;
1810               int subend;
1811               int delbackslash = 0;
1812
1813               FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
1814
1815               if (c == '\\')
1816                 {
1817                   FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
1818                   if (c == '&')
1819                     {
1820                       substart = search_regs.start[sub];
1821                       subend = search_regs.end[sub];
1822                     }
1823                   else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
1824                     {
1825                       if (search_regs.start[c - '0'] >= 0)
1826                         {
1827                           substart = search_regs.start[c - '0'];
1828                           subend = search_regs.end[c - '0'];
1829                         }
1830                     }
1831                   else if (c == '\\')
1832                     delbackslash = 1;
1833                   else
1834                     error ("Invalid use of `\\' in replacement text");
1835                 }
1836               if (substart >= 0)
1837                 {
1838                   if (pos - 1 != lastpos + 1)
1839                     middle = substring_both (newtext, lastpos + 1,
1840                                              lastpos_byte + 1,
1841                                              pos - 1, pos_byte - 1);
1842                   else
1843                     middle = Qnil;
1844                   accum = concat3 (accum, middle,
1845                                    Fsubstring (string,
1846                                                make_number (substart),
1847                                                make_number (subend)));
1848                   lastpos = pos;
1849                   lastpos_byte = pos_byte;
1850                 }
1851               else if (delbackslash)
1852                 {
1853                   middle = substring_both (newtext, lastpos + 1,
1854                                            lastpos_byte + 1,
1855                                            pos, pos_byte);
1856
1857                   accum = concat2 (accum, middle);
1858                   lastpos = pos;
1859                   lastpos_byte = pos_byte;
1860                 }
1861             }
1862
1863           if (pos != lastpos + 1)
1864             middle = substring_both (newtext, lastpos + 1,
1865                                      lastpos_byte + 1,
1866                                      pos, pos_byte);
1867           else
1868             middle = Qnil;
1869
1870           newtext = concat2 (accum, middle);
1871         }
1872
1873       /* Do case substitution in NEWTEXT if desired.  */
1874       if (case_action == all_caps)
1875         newtext = Fupcase (newtext);
1876       else if (case_action == cap_initial)
1877         newtext = Fupcase_initials (newtext);
1878
1879       return concat3 (before, newtext, after);
1880     }
1881
1882   /* Record point, the move (quietly) to the start of the match.  */
1883   if (PT > search_regs.start[sub])
1884     opoint = PT - ZV;
1885   else
1886     opoint = PT;
1887
1888   TEMP_SET_PT (search_regs.start[sub]);
1889
1890   /* We insert the replacement text before the old text, and then
1891      delete the original text.  This means that markers at the
1892      beginning or end of the original will float to the corresponding
1893      position in the replacement.  */
1894   if (!NILP (literal))
1895     Finsert_and_inherit (1, &newtext);
1896   else
1897     {
1898       struct gcpro gcpro1;
1899       GCPRO1 (newtext);
1900
1901       for (pos = 0; pos < XSTRING (newtext)->size; pos++)
1902         {
1903           int offset = PT - search_regs.start[sub];
1904
1905           c = XSTRING (newtext)->data[pos];
1906           if (c == '\\')
1907             {
1908               c = XSTRING (newtext)->data[++pos];
1909               if (c == '&')
1910                 Finsert_buffer_substring
1911                   (Fcurrent_buffer (),
1912                    make_number (search_regs.start[sub] + offset),
1913                    make_number (search_regs.end[sub] + offset));
1914               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
1915                 {
1916                   if (search_regs.start[c - '0'] >= 1)
1917                     Finsert_buffer_substring
1918                       (Fcurrent_buffer (),
1919                        make_number (search_regs.start[c - '0'] + offset),
1920                        make_number (search_regs.end[c - '0'] + offset));
1921                 }
1922               else if (c == '\\')
1923                 insert_char (c);
1924               else
1925                 error ("Invalid use of `\\' in replacement text");
1926             }
1927           else
1928             insert_char (c);
1929         }
1930       UNGCPRO;
1931     }
1932
1933   inslen = PT - (search_regs.start[sub]);
1934   del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
1935
1936   if (case_action == all_caps)
1937     Fupcase_region (make_number (PT - inslen), make_number (PT));
1938   else if (case_action == cap_initial)
1939     Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
1940
1941   newpoint = PT;
1942
1943   /* Put point back where it was in the text.  */
1944   if (opoint <= 0)
1945     TEMP_SET_PT (opoint + ZV);
1946   else
1947     TEMP_SET_PT (opoint);
1948
1949   /* Now move point "officially" to the start of the inserted replacement.  */
1950   move_if_not_intangible (newpoint);
1951
1952   return Qnil;
1953 }
1954 \f
1955 static Lisp_Object
1956 match_limit (num, beginningp)
1957      Lisp_Object num;
1958      int beginningp;
1959 {
1960   register int n;
1961
1962   CHECK_NUMBER (num, 0);
1963   n = XINT (num);
1964   if (n < 0 || n >= search_regs.num_regs)
1965     args_out_of_range (num, make_number (search_regs.num_regs));
1966   if (search_regs.num_regs <= 0
1967       || search_regs.start[n] < 0)
1968     return Qnil;
1969   return (make_number ((beginningp) ? search_regs.start[n]
1970                                     : search_regs.end[n]));
1971 }
1972
1973 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1974   "Return position of start of text matched by last search.\n\
1975 SUBEXP, a number, specifies which parenthesized expression in the last\n\
1976   regexp.\n\
1977 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1978   SUBEXP pairs.\n\
1979 Zero means the entire text matched by the whole regexp or whole string.")
1980   (subexp)
1981      Lisp_Object subexp;
1982 {
1983   return match_limit (subexp, 1);
1984 }
1985
1986 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1987   "Return position of end of text matched by last search.\n\
1988 SUBEXP, a number, specifies which parenthesized expression in the last\n\
1989   regexp.\n\
1990 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1991   SUBEXP pairs.\n\
1992 Zero means the entire text matched by the whole regexp or whole string.")
1993   (subexp)
1994      Lisp_Object subexp;
1995 {
1996   return match_limit (subexp, 0);
1997 }
1998
1999 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2000   "Return a list containing all info on what the last search matched.\n\
2001 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2002 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2003 if the last match was on a buffer; integers or nil if a string was matched.\n\
2004 Use `store-match-data' to reinstate the data in this list.\n\
2005 \n\
2006 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2007 \(rather than markers) to represent buffer positions.\n\
2008 If REUSE is a list, reuse it as part of the value.  If REUSE is long enough\n\
2009 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2010   (integers, reuse)
2011      Lisp_Object integers, reuse;
2012 {
2013   Lisp_Object tail, prev;
2014   Lisp_Object *data;
2015   int i, len;
2016
2017   if (NILP (last_thing_searched))
2018     return Qnil;
2019
2020   data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2021                                  * sizeof (Lisp_Object));
2022
2023   len = -1;
2024   for (i = 0; i < search_regs.num_regs; i++)
2025     {
2026       int start = search_regs.start[i];
2027       if (start >= 0)
2028         {
2029           if (EQ (last_thing_searched, Qt)
2030               || ! NILP (integers))
2031             {
2032               XSETFASTINT (data[2 * i], start);
2033               XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2034             }
2035           else if (BUFFERP (last_thing_searched))
2036             {
2037               data[2 * i] = Fmake_marker ();
2038               Fset_marker (data[2 * i],
2039                            make_number (start),
2040                            last_thing_searched);
2041               data[2 * i + 1] = Fmake_marker ();
2042               Fset_marker (data[2 * i + 1],
2043                            make_number (search_regs.end[i]),
2044                            last_thing_searched);
2045             }
2046           else
2047             /* last_thing_searched must always be Qt, a buffer, or Qnil.  */
2048             abort ();
2049
2050           len = i;
2051         }
2052       else
2053         data[2 * i] = data [2 * i + 1] = Qnil;
2054     }
2055
2056   /* If REUSE is not usable, cons up the values and return them.  */
2057   if (! CONSP (reuse))
2058     return Flist (2 * len + 2, data);
2059
2060   /* If REUSE is a list, store as many value elements as will fit
2061      into the elements of REUSE.  */
2062   for (i = 0, tail = reuse; CONSP (tail);
2063        i++, tail = XCONS (tail)->cdr)
2064     {
2065       if (i < 2 * len + 2)
2066         XCONS (tail)->car = data[i];
2067       else
2068         XCONS (tail)->car = Qnil;
2069       prev = tail;
2070     }
2071
2072   /* If we couldn't fit all value elements into REUSE,
2073      cons up the rest of them and add them to the end of REUSE.  */
2074   if (i < 2 * len + 2)
2075     XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2076
2077   return reuse;
2078 }
2079
2080
2081 DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
2082   "Set internal data on last search match from elements of LIST.\n\
2083 LIST should have been created by calling `match-data' previously.")
2084   (list)
2085      register Lisp_Object list;
2086 {
2087   register int i;
2088   register Lisp_Object marker;
2089
2090   if (running_asynch_code)
2091     save_search_regs ();
2092
2093   if (!CONSP (list) && !NILP (list))
2094     list = wrong_type_argument (Qconsp, list);
2095
2096   /* Unless we find a marker with a buffer in LIST, assume that this
2097      match data came from a string.  */
2098   last_thing_searched = Qt;
2099
2100   /* Allocate registers if they don't already exist.  */
2101   {
2102     int length = XFASTINT (Flength (list)) / 2;
2103
2104     if (length > search_regs.num_regs)
2105       {
2106         if (search_regs.num_regs == 0)
2107           {
2108             search_regs.start
2109               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2110             search_regs.end
2111               = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2112           }
2113         else
2114           {
2115             search_regs.start
2116               = (regoff_t *) xrealloc (search_regs.start,
2117                                        length * sizeof (regoff_t));
2118             search_regs.end
2119               = (regoff_t *) xrealloc (search_regs.end,
2120                                        length * sizeof (regoff_t));
2121           }
2122
2123         search_regs.num_regs = length;
2124       }
2125   }
2126
2127   for (i = 0; i < search_regs.num_regs; i++)
2128     {
2129       marker = Fcar (list);
2130       if (NILP (marker))
2131         {
2132           search_regs.start[i] = -1;
2133           list = Fcdr (list);
2134         }
2135       else
2136         {
2137           if (MARKERP (marker))
2138             {
2139               if (XMARKER (marker)->buffer == 0)
2140                 XSETFASTINT (marker, 0);
2141               else
2142                 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2143             }
2144
2145           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2146           search_regs.start[i] = XINT (marker);
2147           list = Fcdr (list);
2148
2149           marker = Fcar (list);
2150           if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2151             XSETFASTINT (marker, 0);
2152
2153           CHECK_NUMBER_COERCE_MARKER (marker, 0);
2154           search_regs.end[i] = XINT (marker);
2155         }
2156       list = Fcdr (list);
2157     }
2158
2159   return Qnil;
2160 }
2161
2162 /* If non-zero the match data have been saved in saved_search_regs
2163    during the execution of a sentinel or filter. */
2164 static int search_regs_saved;
2165 static struct re_registers saved_search_regs;
2166
2167 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2168    if asynchronous code (filter or sentinel) is running. */
2169 static void
2170 save_search_regs ()
2171 {
2172   if (!search_regs_saved)
2173     {
2174       saved_search_regs.num_regs = search_regs.num_regs;
2175       saved_search_regs.start = search_regs.start;
2176       saved_search_regs.end = search_regs.end;
2177       search_regs.num_regs = 0;
2178       search_regs.start = 0;
2179       search_regs.end = 0;
2180
2181       search_regs_saved = 1;
2182     }
2183 }
2184
2185 /* Called upon exit from filters and sentinels. */
2186 void
2187 restore_match_data ()
2188 {
2189   if (search_regs_saved)
2190     {
2191       if (search_regs.num_regs > 0)
2192         {
2193           xfree (search_regs.start);
2194           xfree (search_regs.end);
2195         }
2196       search_regs.num_regs = saved_search_regs.num_regs;
2197       search_regs.start = saved_search_regs.start;
2198       search_regs.end = saved_search_regs.end;
2199
2200       search_regs_saved = 0;
2201     }
2202 }
2203
2204 /* Quote a string to inactivate reg-expr chars */
2205
2206 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2207   "Return a regexp string which matches exactly STRING and nothing else.")
2208   (string)
2209      Lisp_Object string;
2210 {
2211   register unsigned char *in, *out, *end;
2212   register unsigned char *temp;
2213   int backslashes_added = 0;
2214
2215   CHECK_STRING (string, 0);
2216
2217   temp = (unsigned char *) alloca (XSTRING (string)->size_byte * 2);
2218
2219   /* Now copy the data into the new string, inserting escapes. */
2220
2221   in = XSTRING (string)->data;
2222   end = in + XSTRING (string)->size_byte;
2223   out = temp;
2224
2225   for (; in != end; in++)
2226     {
2227       if (*in == '[' || *in == ']'
2228           || *in == '*' || *in == '.' || *in == '\\'
2229           || *in == '?' || *in == '+'
2230           || *in == '^' || *in == '$')
2231         *out++ = '\\', backslashes_added++;
2232       *out++ = *in;
2233     }
2234
2235   return make_multibyte_string (temp,
2236                                 XSTRING (string)->size + backslashes_added,
2237                                 out - temp);
2238 }
2239 \f
2240 syms_of_search ()
2241 {
2242   register int i;
2243
2244   for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2245     {
2246       searchbufs[i].buf.allocated = 100;
2247       searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2248       searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2249       searchbufs[i].regexp = Qnil;
2250       staticpro (&searchbufs[i].regexp);
2251       searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2252     }
2253   searchbuf_head = &searchbufs[0];
2254
2255   Qsearch_failed = intern ("search-failed");
2256   staticpro (&Qsearch_failed);
2257   Qinvalid_regexp = intern ("invalid-regexp");
2258   staticpro (&Qinvalid_regexp);
2259
2260   Fput (Qsearch_failed, Qerror_conditions,
2261         Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2262   Fput (Qsearch_failed, Qerror_message,
2263         build_string ("Search failed"));
2264
2265   Fput (Qinvalid_regexp, Qerror_conditions,
2266         Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2267   Fput (Qinvalid_regexp, Qerror_message,
2268         build_string ("Invalid regexp"));
2269
2270   last_thing_searched = Qnil;
2271   staticpro (&last_thing_searched);
2272
2273   defsubr (&Slooking_at);
2274   defsubr (&Sposix_looking_at);
2275   defsubr (&Sstring_match);
2276   defsubr (&Sposix_string_match);
2277   defsubr (&Ssearch_forward);
2278   defsubr (&Ssearch_backward);
2279   defsubr (&Sword_search_forward);
2280   defsubr (&Sword_search_backward);
2281   defsubr (&Sre_search_forward);
2282   defsubr (&Sre_search_backward);
2283   defsubr (&Sposix_search_forward);
2284   defsubr (&Sposix_search_backward);
2285   defsubr (&Sreplace_match);
2286   defsubr (&Smatch_beginning);
2287   defsubr (&Smatch_end);
2288   defsubr (&Smatch_data);
2289   defsubr (&Sstore_match_data);
2290   defsubr (&Sregexp_quote);
2291 }