]> code.delx.au - gnu-emacs/blob - src/search.c
(search_buffer): Set search regs for all success with an empty string.
[gnu-emacs] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #ifdef STDC_HEADERS
24 #include <stdlib.h>
25 #endif
26 #include "lisp.h"
27 #include "syntax.h"
28 #include "category.h"
29 #include "buffer.h"
30 #include "charset.h"
31 #include "region-cache.h"
32 #include "commands.h"
33 #include "blockinput.h"
34 #include "intervals.h"
35
36 #include <sys/types.h>
37 #include "regex.h"
38
39 #define min(a, b) ((a) < (b) ? (a) : (b))
40 #define max(a, b) ((a) > (b) ? (a) : (b))
41
42 #define REGEXP_CACHE_SIZE 20
43
44 /* If the regexp is non-nil, then the buffer contains the compiled form
45 of that regexp, suitable for searching. */
46 struct regexp_cache
47 {
48 struct regexp_cache *next;
49 Lisp_Object regexp;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
54 };
55
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
58
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache *searchbuf_head;
61
62
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
81
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
86
87 /* error condition signaled when regexp compile_pattern fails */
88
89 Lisp_Object Qinvalid_regexp;
90
91 static void set_search_regs ();
92 static void save_search_regs ();
93 static int simple_search ();
94 static int boyer_moore ();
95 static int search_buffer ();
96
97 static void
98 matcher_overflow ()
99 {
100 error ("Stack overflow in regexp matcher");
101 }
102
103 #ifdef __STDC__
104 #define CONST const
105 #else
106 #define CONST
107 #endif
108
109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
112 TRANSLATE is a translation table for ignoring case, or nil for none.
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
118 for this pattern. 0 means backtrack only enough to get a valid match.
119 MULTIBYTE is nonzero if we want to handle multibyte characters in
120 PATTERN. 0 means all multibyte characters are recognized just as
121 sequences of binary data. */
122
123 static void
124 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
125 struct regexp_cache *cp;
126 Lisp_Object pattern;
127 Lisp_Object translate;
128 struct re_registers *regp;
129 int posix;
130 int multibyte;
131 {
132 unsigned char *raw_pattern;
133 int raw_pattern_size;
134 char *val;
135 reg_syntax_t old;
136
137 /* MULTIBYTE says whether the text to be searched is multibyte.
138 We must convert PATTERN to match that, or we will not really
139 find things right. */
140
141 if (multibyte == STRING_MULTIBYTE (pattern))
142 {
143 raw_pattern = (unsigned char *) XSTRING (pattern)->data;
144 raw_pattern_size = STRING_BYTES (XSTRING (pattern));
145 }
146 else if (multibyte)
147 {
148 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
149 XSTRING (pattern)->size);
150 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
151 copy_text (XSTRING (pattern)->data, raw_pattern,
152 XSTRING (pattern)->size, 0, 1);
153 }
154 else
155 {
156 /* Converting multibyte to single-byte.
157
158 ??? Perhaps this conversion should be done in a special way
159 by subtracting nonascii-insert-offset from each non-ASCII char,
160 so that only the multibyte chars which really correspond to
161 the chosen single-byte character set can possibly match. */
162 raw_pattern_size = XSTRING (pattern)->size;
163 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
164 copy_text (XSTRING (pattern)->data, raw_pattern,
165 STRING_BYTES (XSTRING (pattern)), 1, 0);
166 }
167
168 cp->regexp = Qnil;
169 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
170 cp->posix = posix;
171 cp->buf.multibyte = multibyte;
172 BLOCK_INPUT;
173 old = re_set_syntax (RE_SYNTAX_EMACS
174 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
175 val = (char *) re_compile_pattern ((char *)raw_pattern,
176 raw_pattern_size, &cp->buf);
177 re_set_syntax (old);
178 UNBLOCK_INPUT;
179 if (val)
180 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
181
182 cp->regexp = Fcopy_sequence (pattern);
183 }
184
185 /* Compile a regexp if necessary, but first check to see if there's one in
186 the cache.
187 PATTERN is the pattern to compile.
188 TRANSLATE is a translation table for ignoring case, or nil for none.
189 REGP is the structure that says where to store the "register"
190 values that will result from matching this pattern.
191 If it is 0, we should compile the pattern not to record any
192 subexpression bounds.
193 POSIX is nonzero if we want full backtracking (POSIX style)
194 for this pattern. 0 means backtrack only enough to get a valid match. */
195
196 struct re_pattern_buffer *
197 compile_pattern (pattern, regp, translate, posix, multibyte)
198 Lisp_Object pattern;
199 struct re_registers *regp;
200 Lisp_Object translate;
201 int posix, multibyte;
202 {
203 struct regexp_cache *cp, **cpp;
204
205 for (cpp = &searchbuf_head; ; cpp = &cp->next)
206 {
207 cp = *cpp;
208 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
209 && !NILP (Fstring_equal (cp->regexp, pattern))
210 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
211 && cp->posix == posix
212 && cp->buf.multibyte == multibyte)
213 break;
214
215 /* If we're at the end of the cache, compile into the last cell. */
216 if (cp->next == 0)
217 {
218 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
219 break;
220 }
221 }
222
223 /* When we get here, cp (aka *cpp) contains the compiled pattern,
224 either because we found it in the cache or because we just compiled it.
225 Move it to the front of the queue to mark it as most recently used. */
226 *cpp = cp->next;
227 cp->next = searchbuf_head;
228 searchbuf_head = cp;
229
230 /* Advise the searching functions about the space we have allocated
231 for register data. */
232 if (regp)
233 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
234
235 return &cp->buf;
236 }
237
238 /* Error condition used for failing searches */
239 Lisp_Object Qsearch_failed;
240
241 Lisp_Object
242 signal_failure (arg)
243 Lisp_Object arg;
244 {
245 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
246 return Qnil;
247 }
248 \f
249 static Lisp_Object
250 looking_at_1 (string, posix)
251 Lisp_Object string;
252 int posix;
253 {
254 Lisp_Object val;
255 unsigned char *p1, *p2;
256 int s1, s2;
257 register int i;
258 struct re_pattern_buffer *bufp;
259
260 if (running_asynch_code)
261 save_search_regs ();
262
263 CHECK_STRING (string, 0);
264 bufp = compile_pattern (string, &search_regs,
265 (!NILP (current_buffer->case_fold_search)
266 ? DOWNCASE_TABLE : Qnil),
267 posix,
268 !NILP (current_buffer->enable_multibyte_characters));
269
270 immediate_quit = 1;
271 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
272
273 /* Get pointers and sizes of the two strings
274 that make up the visible portion of the buffer. */
275
276 p1 = BEGV_ADDR;
277 s1 = GPT_BYTE - BEGV_BYTE;
278 p2 = GAP_END_ADDR;
279 s2 = ZV_BYTE - GPT_BYTE;
280 if (s1 < 0)
281 {
282 p2 = p1;
283 s2 = ZV_BYTE - BEGV_BYTE;
284 s1 = 0;
285 }
286 if (s2 < 0)
287 {
288 s1 = ZV_BYTE - BEGV_BYTE;
289 s2 = 0;
290 }
291
292 re_match_object = Qnil;
293
294 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
295 PT_BYTE - BEGV_BYTE, &search_regs,
296 ZV_BYTE - BEGV_BYTE);
297 if (i == -2)
298 matcher_overflow ();
299
300 val = (0 <= i ? Qt : Qnil);
301 if (i >= 0)
302 for (i = 0; i < search_regs.num_regs; i++)
303 if (search_regs.start[i] >= 0)
304 {
305 search_regs.start[i]
306 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
307 search_regs.end[i]
308 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
309 }
310 XSETBUFFER (last_thing_searched, current_buffer);
311 immediate_quit = 0;
312 return val;
313 }
314
315 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
316 "Return t if text after point matches regular expression REGEXP.\n\
317 This function modifies the match data that `match-beginning',\n\
318 `match-end' and `match-data' access; save and restore the match\n\
319 data if you want to preserve them.")
320 (regexp)
321 Lisp_Object regexp;
322 {
323 return looking_at_1 (regexp, 0);
324 }
325
326 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
327 "Return t if text after point matches regular expression REGEXP.\n\
328 Find the longest match, in accord with Posix regular expression rules.\n\
329 This function modifies the match data that `match-beginning',\n\
330 `match-end' and `match-data' access; save and restore the match\n\
331 data if you want to preserve them.")
332 (regexp)
333 Lisp_Object regexp;
334 {
335 return looking_at_1 (regexp, 1);
336 }
337 \f
338 static Lisp_Object
339 string_match_1 (regexp, string, start, posix)
340 Lisp_Object regexp, string, start;
341 int posix;
342 {
343 int val;
344 struct re_pattern_buffer *bufp;
345 int pos, pos_byte;
346 int i;
347
348 if (running_asynch_code)
349 save_search_regs ();
350
351 CHECK_STRING (regexp, 0);
352 CHECK_STRING (string, 1);
353
354 if (NILP (start))
355 pos = 0, pos_byte = 0;
356 else
357 {
358 int len = XSTRING (string)->size;
359
360 CHECK_NUMBER (start, 2);
361 pos = XINT (start);
362 if (pos < 0 && -pos <= len)
363 pos = len + pos;
364 else if (0 > pos || pos > len)
365 args_out_of_range (string, start);
366 pos_byte = string_char_to_byte (string, pos);
367 }
368
369 bufp = compile_pattern (regexp, &search_regs,
370 (!NILP (current_buffer->case_fold_search)
371 ? DOWNCASE_TABLE : Qnil),
372 posix,
373 STRING_MULTIBYTE (string));
374 immediate_quit = 1;
375 re_match_object = string;
376
377 val = re_search (bufp, (char *) XSTRING (string)->data,
378 STRING_BYTES (XSTRING (string)), pos_byte,
379 STRING_BYTES (XSTRING (string)) - pos_byte,
380 &search_regs);
381 immediate_quit = 0;
382 last_thing_searched = Qt;
383 if (val == -2)
384 matcher_overflow ();
385 if (val < 0) return Qnil;
386
387 for (i = 0; i < search_regs.num_regs; i++)
388 if (search_regs.start[i] >= 0)
389 {
390 search_regs.start[i]
391 = string_byte_to_char (string, search_regs.start[i]);
392 search_regs.end[i]
393 = string_byte_to_char (string, search_regs.end[i]);
394 }
395
396 return make_number (string_byte_to_char (string, val));
397 }
398
399 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
400 "Return index of start of first match for REGEXP in STRING, or nil.\n\
401 If third arg START is non-nil, start search at that index in STRING.\n\
402 For index of first char beyond the match, do (match-end 0).\n\
403 `match-end' and `match-beginning' also give indices of substrings\n\
404 matched by parenthesis constructs in the pattern.")
405 (regexp, string, start)
406 Lisp_Object regexp, string, start;
407 {
408 return string_match_1 (regexp, string, start, 0);
409 }
410
411 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
412 "Return index of start of first match for REGEXP in STRING, or nil.\n\
413 Find the longest match, in accord with Posix regular expression rules.\n\
414 If third arg START is non-nil, start search at that index in STRING.\n\
415 For index of first char beyond the match, do (match-end 0).\n\
416 `match-end' and `match-beginning' also give indices of substrings\n\
417 matched by parenthesis constructs in the pattern.")
418 (regexp, string, start)
419 Lisp_Object regexp, string, start;
420 {
421 return string_match_1 (regexp, string, start, 1);
422 }
423
424 /* Match REGEXP against STRING, searching all of STRING,
425 and return the index of the match, or negative on failure.
426 This does not clobber the match data. */
427
428 int
429 fast_string_match (regexp, string)
430 Lisp_Object regexp, string;
431 {
432 int val;
433 struct re_pattern_buffer *bufp;
434
435 bufp = compile_pattern (regexp, 0, Qnil,
436 0, STRING_MULTIBYTE (string));
437 immediate_quit = 1;
438 re_match_object = string;
439
440 val = re_search (bufp, (char *) XSTRING (string)->data,
441 STRING_BYTES (XSTRING (string)), 0,
442 STRING_BYTES (XSTRING (string)), 0);
443 immediate_quit = 0;
444 return val;
445 }
446
447 /* Match REGEXP against STRING, searching all of STRING ignoring case,
448 and return the index of the match, or negative on failure.
449 This does not clobber the match data.
450 We assume that STRING contains single-byte characters. */
451
452 extern Lisp_Object Vascii_downcase_table;
453
454 int
455 fast_c_string_match_ignore_case (regexp, string)
456 Lisp_Object regexp;
457 char *string;
458 {
459 int val;
460 struct re_pattern_buffer *bufp;
461 int len = strlen (string);
462
463 regexp = string_make_unibyte (regexp);
464 re_match_object = Qt;
465 bufp = compile_pattern (regexp, 0,
466 Vascii_downcase_table, 0,
467 0);
468 immediate_quit = 1;
469 val = re_search (bufp, string, len, 0, len, 0);
470 immediate_quit = 0;
471 return val;
472 }
473 \f
474 /* The newline cache: remembering which sections of text have no newlines. */
475
476 /* If the user has requested newline caching, make sure it's on.
477 Otherwise, make sure it's off.
478 This is our cheezy way of associating an action with the change of
479 state of a buffer-local variable. */
480 static void
481 newline_cache_on_off (buf)
482 struct buffer *buf;
483 {
484 if (NILP (buf->cache_long_line_scans))
485 {
486 /* It should be off. */
487 if (buf->newline_cache)
488 {
489 free_region_cache (buf->newline_cache);
490 buf->newline_cache = 0;
491 }
492 }
493 else
494 {
495 /* It should be on. */
496 if (buf->newline_cache == 0)
497 buf->newline_cache = new_region_cache ();
498 }
499 }
500
501 \f
502 /* Search for COUNT instances of the character TARGET between START and END.
503
504 If COUNT is positive, search forwards; END must be >= START.
505 If COUNT is negative, search backwards for the -COUNTth instance;
506 END must be <= START.
507 If COUNT is zero, do anything you please; run rogue, for all I care.
508
509 If END is zero, use BEGV or ZV instead, as appropriate for the
510 direction indicated by COUNT.
511
512 If we find COUNT instances, set *SHORTAGE to zero, and return the
513 position after the COUNTth match. Note that for reverse motion
514 this is not the same as the usual convention for Emacs motion commands.
515
516 If we don't find COUNT instances before reaching END, set *SHORTAGE
517 to the number of TARGETs left unfound, and return END.
518
519 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
520 except when inside redisplay. */
521
522 int
523 scan_buffer (target, start, end, count, shortage, allow_quit)
524 register int target;
525 int start, end;
526 int count;
527 int *shortage;
528 int allow_quit;
529 {
530 struct region_cache *newline_cache;
531 int direction;
532
533 if (count > 0)
534 {
535 direction = 1;
536 if (! end) end = ZV;
537 }
538 else
539 {
540 direction = -1;
541 if (! end) end = BEGV;
542 }
543
544 newline_cache_on_off (current_buffer);
545 newline_cache = current_buffer->newline_cache;
546
547 if (shortage != 0)
548 *shortage = 0;
549
550 immediate_quit = allow_quit;
551
552 if (count > 0)
553 while (start != end)
554 {
555 /* Our innermost scanning loop is very simple; it doesn't know
556 about gaps, buffer ends, or the newline cache. ceiling is
557 the position of the last character before the next such
558 obstacle --- the last character the dumb search loop should
559 examine. */
560 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
561 int start_byte = CHAR_TO_BYTE (start);
562 int tem;
563
564 /* If we're looking for a newline, consult the newline cache
565 to see where we can avoid some scanning. */
566 if (target == '\n' && newline_cache)
567 {
568 int next_change;
569 immediate_quit = 0;
570 while (region_cache_forward
571 (current_buffer, newline_cache, start_byte, &next_change))
572 start_byte = next_change;
573 immediate_quit = allow_quit;
574
575 /* START should never be after END. */
576 if (start_byte > ceiling_byte)
577 start_byte = ceiling_byte;
578
579 /* Now the text after start is an unknown region, and
580 next_change is the position of the next known region. */
581 ceiling_byte = min (next_change - 1, ceiling_byte);
582 }
583
584 /* The dumb loop can only scan text stored in contiguous
585 bytes. BUFFER_CEILING_OF returns the last character
586 position that is contiguous, so the ceiling is the
587 position after that. */
588 tem = BUFFER_CEILING_OF (start_byte);
589 ceiling_byte = min (tem, ceiling_byte);
590
591 {
592 /* The termination address of the dumb loop. */
593 register unsigned char *ceiling_addr
594 = BYTE_POS_ADDR (ceiling_byte) + 1;
595 register unsigned char *cursor
596 = BYTE_POS_ADDR (start_byte);
597 unsigned char *base = cursor;
598
599 while (cursor < ceiling_addr)
600 {
601 unsigned char *scan_start = cursor;
602
603 /* The dumb loop. */
604 while (*cursor != target && ++cursor < ceiling_addr)
605 ;
606
607 /* If we're looking for newlines, cache the fact that
608 the region from start to cursor is free of them. */
609 if (target == '\n' && newline_cache)
610 know_region_cache (current_buffer, newline_cache,
611 start_byte + scan_start - base,
612 start_byte + cursor - base);
613
614 /* Did we find the target character? */
615 if (cursor < ceiling_addr)
616 {
617 if (--count == 0)
618 {
619 immediate_quit = 0;
620 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
621 }
622 cursor++;
623 }
624 }
625
626 start = BYTE_TO_CHAR (start_byte + cursor - base);
627 }
628 }
629 else
630 while (start > end)
631 {
632 /* The last character to check before the next obstacle. */
633 int ceiling_byte = CHAR_TO_BYTE (end);
634 int start_byte = CHAR_TO_BYTE (start);
635 int tem;
636
637 /* Consult the newline cache, if appropriate. */
638 if (target == '\n' && newline_cache)
639 {
640 int next_change;
641 immediate_quit = 0;
642 while (region_cache_backward
643 (current_buffer, newline_cache, start_byte, &next_change))
644 start_byte = next_change;
645 immediate_quit = allow_quit;
646
647 /* Start should never be at or before end. */
648 if (start_byte <= ceiling_byte)
649 start_byte = ceiling_byte + 1;
650
651 /* Now the text before start is an unknown region, and
652 next_change is the position of the next known region. */
653 ceiling_byte = max (next_change, ceiling_byte);
654 }
655
656 /* Stop scanning before the gap. */
657 tem = BUFFER_FLOOR_OF (start_byte - 1);
658 ceiling_byte = max (tem, ceiling_byte);
659
660 {
661 /* The termination address of the dumb loop. */
662 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
663 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
664 unsigned char *base = cursor;
665
666 while (cursor >= ceiling_addr)
667 {
668 unsigned char *scan_start = cursor;
669
670 while (*cursor != target && --cursor >= ceiling_addr)
671 ;
672
673 /* If we're looking for newlines, cache the fact that
674 the region from after the cursor to start is free of them. */
675 if (target == '\n' && newline_cache)
676 know_region_cache (current_buffer, newline_cache,
677 start_byte + cursor - base,
678 start_byte + scan_start - base);
679
680 /* Did we find the target character? */
681 if (cursor >= ceiling_addr)
682 {
683 if (++count >= 0)
684 {
685 immediate_quit = 0;
686 return BYTE_TO_CHAR (start_byte + cursor - base);
687 }
688 cursor--;
689 }
690 }
691
692 start = BYTE_TO_CHAR (start_byte + cursor - base);
693 }
694 }
695
696 immediate_quit = 0;
697 if (shortage != 0)
698 *shortage = count * direction;
699 return start;
700 }
701 \f
702 /* Search for COUNT instances of a line boundary, which means either a
703 newline or (if selective display enabled) a carriage return.
704 Start at START. If COUNT is negative, search backwards.
705
706 We report the resulting position by calling TEMP_SET_PT_BOTH.
707
708 If we find COUNT instances. we position after (always after,
709 even if scanning backwards) the COUNTth match, and return 0.
710
711 If we don't find COUNT instances before reaching the end of the
712 buffer (or the beginning, if scanning backwards), we return
713 the number of line boundaries left unfound, and position at
714 the limit we bumped up against.
715
716 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
717 except in special cases. */
718
719 int
720 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
721 int start, start_byte;
722 int limit, limit_byte;
723 register int count;
724 int allow_quit;
725 {
726 int direction = ((count > 0) ? 1 : -1);
727
728 register unsigned char *cursor;
729 unsigned char *base;
730
731 register int ceiling;
732 register unsigned char *ceiling_addr;
733
734 int old_immediate_quit = immediate_quit;
735
736 /* If we are not in selective display mode,
737 check only for newlines. */
738 int selective_display = (!NILP (current_buffer->selective_display)
739 && !INTEGERP (current_buffer->selective_display));
740
741 /* The code that follows is like scan_buffer
742 but checks for either newline or carriage return. */
743
744 if (allow_quit)
745 immediate_quit++;
746
747 start_byte = CHAR_TO_BYTE (start);
748
749 if (count > 0)
750 {
751 while (start_byte < limit_byte)
752 {
753 ceiling = BUFFER_CEILING_OF (start_byte);
754 ceiling = min (limit_byte - 1, ceiling);
755 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
756 base = (cursor = BYTE_POS_ADDR (start_byte));
757 while (1)
758 {
759 while (*cursor != '\n' && ++cursor != ceiling_addr)
760 ;
761
762 if (cursor != ceiling_addr)
763 {
764 if (--count == 0)
765 {
766 immediate_quit = old_immediate_quit;
767 start_byte = start_byte + cursor - base + 1;
768 start = BYTE_TO_CHAR (start_byte);
769 TEMP_SET_PT_BOTH (start, start_byte);
770 return 0;
771 }
772 else
773 if (++cursor == ceiling_addr)
774 break;
775 }
776 else
777 break;
778 }
779 start_byte += cursor - base;
780 }
781 }
782 else
783 {
784 while (start_byte > limit_byte)
785 {
786 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
787 ceiling = max (limit_byte, ceiling);
788 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
789 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
790 while (1)
791 {
792 while (--cursor != ceiling_addr && *cursor != '\n')
793 ;
794
795 if (cursor != ceiling_addr)
796 {
797 if (++count == 0)
798 {
799 immediate_quit = old_immediate_quit;
800 /* Return the position AFTER the match we found. */
801 start_byte = start_byte + cursor - base + 1;
802 start = BYTE_TO_CHAR (start_byte);
803 TEMP_SET_PT_BOTH (start, start_byte);
804 return 0;
805 }
806 }
807 else
808 break;
809 }
810 /* Here we add 1 to compensate for the last decrement
811 of CURSOR, which took it past the valid range. */
812 start_byte += cursor - base + 1;
813 }
814 }
815
816 TEMP_SET_PT_BOTH (limit, limit_byte);
817 immediate_quit = old_immediate_quit;
818
819 return count * direction;
820 }
821
822 int
823 find_next_newline_no_quit (from, cnt)
824 register int from, cnt;
825 {
826 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
827 }
828
829 /* Like find_next_newline, but returns position before the newline,
830 not after, and only search up to TO. This isn't just
831 find_next_newline (...)-1, because you might hit TO. */
832
833 int
834 find_before_next_newline (from, to, cnt)
835 int from, to, cnt;
836 {
837 int shortage;
838 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
839
840 if (shortage == 0)
841 pos--;
842
843 return pos;
844 }
845 \f
846 /* Subroutines of Lisp buffer search functions. */
847
848 static Lisp_Object
849 search_command (string, bound, noerror, count, direction, RE, posix)
850 Lisp_Object string, bound, noerror, count;
851 int direction;
852 int RE;
853 int posix;
854 {
855 register int np;
856 int lim, lim_byte;
857 int n = direction;
858
859 if (!NILP (count))
860 {
861 CHECK_NUMBER (count, 3);
862 n *= XINT (count);
863 }
864
865 CHECK_STRING (string, 0);
866 if (NILP (bound))
867 {
868 if (n > 0)
869 lim = ZV, lim_byte = ZV_BYTE;
870 else
871 lim = BEGV, lim_byte = BEGV_BYTE;
872 }
873 else
874 {
875 CHECK_NUMBER_COERCE_MARKER (bound, 1);
876 lim = XINT (bound);
877 if (n > 0 ? lim < PT : lim > PT)
878 error ("Invalid search bound (wrong side of point)");
879 if (lim > ZV)
880 lim = ZV, lim_byte = ZV_BYTE;
881 else if (lim < BEGV)
882 lim = BEGV, lim_byte = BEGV_BYTE;
883 else
884 lim_byte = CHAR_TO_BYTE (lim);
885 }
886
887 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
888 (!NILP (current_buffer->case_fold_search)
889 ? current_buffer->case_canon_table
890 : Qnil),
891 (!NILP (current_buffer->case_fold_search)
892 ? current_buffer->case_eqv_table
893 : Qnil),
894 posix);
895 if (np <= 0)
896 {
897 if (NILP (noerror))
898 return signal_failure (string);
899 if (!EQ (noerror, Qt))
900 {
901 if (lim < BEGV || lim > ZV)
902 abort ();
903 SET_PT_BOTH (lim, lim_byte);
904 return Qnil;
905 #if 0 /* This would be clean, but maybe programs depend on
906 a value of nil here. */
907 np = lim;
908 #endif
909 }
910 else
911 return Qnil;
912 }
913
914 if (np < BEGV || np > ZV)
915 abort ();
916
917 SET_PT (np);
918
919 return make_number (np);
920 }
921 \f
922 /* Return 1 if REGEXP it matches just one constant string. */
923
924 static int
925 trivial_regexp_p (regexp)
926 Lisp_Object regexp;
927 {
928 int len = STRING_BYTES (XSTRING (regexp));
929 unsigned char *s = XSTRING (regexp)->data;
930 unsigned char c;
931 while (--len >= 0)
932 {
933 switch (*s++)
934 {
935 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
936 return 0;
937 case '\\':
938 if (--len < 0)
939 return 0;
940 switch (*s++)
941 {
942 case '|': case '(': case ')': case '`': case '\'': case 'b':
943 case 'B': case '<': case '>': case 'w': case 'W': case 's':
944 case 'S': case '=':
945 case 'c': case 'C': /* for categoryspec and notcategoryspec */
946 case '1': case '2': case '3': case '4': case '5':
947 case '6': case '7': case '8': case '9':
948 return 0;
949 }
950 }
951 }
952 return 1;
953 }
954
955 /* Search for the n'th occurrence of STRING in the current buffer,
956 starting at position POS and stopping at position LIM,
957 treating STRING as a literal string if RE is false or as
958 a regular expression if RE is true.
959
960 If N is positive, searching is forward and LIM must be greater than POS.
961 If N is negative, searching is backward and LIM must be less than POS.
962
963 Returns -x if x occurrences remain to be found (x > 0),
964 or else the position at the beginning of the Nth occurrence
965 (if searching backward) or the end (if searching forward).
966
967 POSIX is nonzero if we want full backtracking (POSIX style)
968 for this pattern. 0 means backtrack only enough to get a valid match. */
969
970 #define TRANSLATE(out, trt, d) \
971 do \
972 { \
973 if (! NILP (trt)) \
974 { \
975 Lisp_Object temp; \
976 temp = Faref (trt, make_number (d)); \
977 if (INTEGERP (temp)) \
978 out = XINT (temp); \
979 else \
980 out = d; \
981 } \
982 else \
983 out = d; \
984 } \
985 while (0)
986
987 static int
988 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
989 RE, trt, inverse_trt, posix)
990 Lisp_Object string;
991 int pos;
992 int pos_byte;
993 int lim;
994 int lim_byte;
995 int n;
996 int RE;
997 Lisp_Object trt;
998 Lisp_Object inverse_trt;
999 int posix;
1000 {
1001 int len = XSTRING (string)->size;
1002 int len_byte = STRING_BYTES (XSTRING (string));
1003 register int i;
1004
1005 if (running_asynch_code)
1006 save_search_regs ();
1007
1008 /* Searching 0 times means don't move. */
1009 /* Null string is found at starting position. */
1010 if (len == 0 || n == 0)
1011 {
1012 set_search_regs (pos, 0);
1013 return pos;
1014 }
1015
1016 if (RE && !trivial_regexp_p (string))
1017 {
1018 unsigned char *p1, *p2;
1019 int s1, s2;
1020 struct re_pattern_buffer *bufp;
1021
1022 bufp = compile_pattern (string, &search_regs, trt, posix,
1023 !NILP (current_buffer->enable_multibyte_characters));
1024
1025 immediate_quit = 1; /* Quit immediately if user types ^G,
1026 because letting this function finish
1027 can take too long. */
1028 QUIT; /* Do a pending quit right away,
1029 to avoid paradoxical behavior */
1030 /* Get pointers and sizes of the two strings
1031 that make up the visible portion of the buffer. */
1032
1033 p1 = BEGV_ADDR;
1034 s1 = GPT_BYTE - BEGV_BYTE;
1035 p2 = GAP_END_ADDR;
1036 s2 = ZV_BYTE - GPT_BYTE;
1037 if (s1 < 0)
1038 {
1039 p2 = p1;
1040 s2 = ZV_BYTE - BEGV_BYTE;
1041 s1 = 0;
1042 }
1043 if (s2 < 0)
1044 {
1045 s1 = ZV_BYTE - BEGV_BYTE;
1046 s2 = 0;
1047 }
1048 re_match_object = Qnil;
1049
1050 while (n < 0)
1051 {
1052 int val;
1053 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1054 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1055 &search_regs,
1056 /* Don't allow match past current point */
1057 pos_byte - BEGV_BYTE);
1058 if (val == -2)
1059 {
1060 matcher_overflow ();
1061 }
1062 if (val >= 0)
1063 {
1064 pos_byte = search_regs.start[0] + BEGV_BYTE;
1065 for (i = 0; i < search_regs.num_regs; i++)
1066 if (search_regs.start[i] >= 0)
1067 {
1068 search_regs.start[i]
1069 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1070 search_regs.end[i]
1071 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1072 }
1073 XSETBUFFER (last_thing_searched, current_buffer);
1074 /* Set pos to the new position. */
1075 pos = search_regs.start[0];
1076 }
1077 else
1078 {
1079 immediate_quit = 0;
1080 return (n);
1081 }
1082 n++;
1083 }
1084 while (n > 0)
1085 {
1086 int val;
1087 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1088 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1089 &search_regs,
1090 lim_byte - BEGV_BYTE);
1091 if (val == -2)
1092 {
1093 matcher_overflow ();
1094 }
1095 if (val >= 0)
1096 {
1097 pos_byte = search_regs.end[0] + BEGV_BYTE;
1098 for (i = 0; i < search_regs.num_regs; i++)
1099 if (search_regs.start[i] >= 0)
1100 {
1101 search_regs.start[i]
1102 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1103 search_regs.end[i]
1104 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1105 }
1106 XSETBUFFER (last_thing_searched, current_buffer);
1107 pos = search_regs.end[0];
1108 }
1109 else
1110 {
1111 immediate_quit = 0;
1112 return (0 - n);
1113 }
1114 n--;
1115 }
1116 immediate_quit = 0;
1117 return (pos);
1118 }
1119 else /* non-RE case */
1120 {
1121 unsigned char *raw_pattern, *pat;
1122 int raw_pattern_size;
1123 int raw_pattern_size_byte;
1124 unsigned char *patbuf;
1125 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1126 unsigned char *base_pat = XSTRING (string)->data;
1127 int charset_base = -1;
1128 int simple = 1;
1129
1130 /* MULTIBYTE says whether the text to be searched is multibyte.
1131 We must convert PATTERN to match that, or we will not really
1132 find things right. */
1133
1134 if (multibyte == STRING_MULTIBYTE (string))
1135 {
1136 raw_pattern = (unsigned char *) XSTRING (string)->data;
1137 raw_pattern_size = XSTRING (string)->size;
1138 raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
1139 }
1140 else if (multibyte)
1141 {
1142 raw_pattern_size = XSTRING (string)->size;
1143 raw_pattern_size_byte
1144 = count_size_as_multibyte (XSTRING (string)->data,
1145 raw_pattern_size);
1146 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1147 copy_text (XSTRING (string)->data, raw_pattern,
1148 XSTRING (string)->size, 0, 1);
1149 }
1150 else
1151 {
1152 /* Converting multibyte to single-byte.
1153
1154 ??? Perhaps this conversion should be done in a special way
1155 by subtracting nonascii-insert-offset from each non-ASCII char,
1156 so that only the multibyte chars which really correspond to
1157 the chosen single-byte character set can possibly match. */
1158 raw_pattern_size = XSTRING (string)->size;
1159 raw_pattern_size_byte = XSTRING (string)->size;
1160 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1161 copy_text (XSTRING (string)->data, raw_pattern,
1162 STRING_BYTES (XSTRING (string)), 1, 0);
1163 }
1164
1165 /* Copy and optionally translate the pattern. */
1166 len = raw_pattern_size;
1167 len_byte = raw_pattern_size_byte;
1168 patbuf = (unsigned char *) alloca (len_byte);
1169 pat = patbuf;
1170 base_pat = raw_pattern;
1171 if (multibyte)
1172 {
1173 while (--len >= 0)
1174 {
1175 unsigned char workbuf[4], *str;
1176 int c, translated, inverse;
1177 int in_charlen, charlen;
1178
1179 /* If we got here and the RE flag is set, it's because we're
1180 dealing with a regexp known to be trivial, so the backslash
1181 just quotes the next character. */
1182 if (RE && *base_pat == '\\')
1183 {
1184 len--;
1185 len_byte--;
1186 base_pat++;
1187 }
1188
1189 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1190 /* Translate the character, if requested. */
1191 TRANSLATE (translated, trt, c);
1192 /* If translation changed the byte-length, go back
1193 to the original character. */
1194 charlen = CHAR_STRING (translated, workbuf, str);
1195 if (in_charlen != charlen)
1196 {
1197 translated = c;
1198 charlen = CHAR_STRING (c, workbuf, str);
1199 }
1200
1201 TRANSLATE (inverse, inverse_trt, c);
1202
1203 /* Did this char actually get translated?
1204 Would any other char get translated into it? */
1205 if (translated != c || inverse != c)
1206 {
1207 /* Keep track of which character set row
1208 contains the characters that need translation. */
1209 int charset_base_code = c & ~0xff;
1210 if (charset_base == -1)
1211 charset_base = charset_base_code;
1212 else if (charset_base != charset_base_code)
1213 /* If two different rows appear, needing translation,
1214 then we cannot use boyer_moore search. */
1215 simple = 0;
1216 /* ??? Handa: this must do simple = 0
1217 if c is a composite character. */
1218 }
1219
1220 /* Store this character into the translated pattern. */
1221 bcopy (str, pat, charlen);
1222 pat += charlen;
1223 base_pat += in_charlen;
1224 len_byte -= in_charlen;
1225 }
1226 }
1227 else
1228 {
1229 while (--len >= 0)
1230 {
1231 int c, translated, inverse;
1232
1233 /* If we got here and the RE flag is set, it's because we're
1234 dealing with a regexp known to be trivial, so the backslash
1235 just quotes the next character. */
1236 if (RE && *base_pat == '\\')
1237 {
1238 len--;
1239 base_pat++;
1240 }
1241 c = *base_pat++;
1242 TRANSLATE (translated, trt, c);
1243 TRANSLATE (inverse, inverse_trt, c);
1244
1245 /* Did this char actually get translated?
1246 Would any other char get translated into it? */
1247 if (translated != c || inverse != c)
1248 {
1249 /* Keep track of which character set row
1250 contains the characters that need translation. */
1251 int charset_base_code = c & ~0xff;
1252 if (charset_base == -1)
1253 charset_base = charset_base_code;
1254 else if (charset_base != charset_base_code)
1255 /* If two different rows appear, needing translation,
1256 then we cannot use boyer_moore search. */
1257 simple = 0;
1258 }
1259 *pat++ = translated;
1260 }
1261 }
1262
1263 len_byte = pat - patbuf;
1264 len = raw_pattern_size;
1265 pat = base_pat = patbuf;
1266
1267 if (simple)
1268 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1269 pos, pos_byte, lim, lim_byte,
1270 charset_base);
1271 else
1272 return simple_search (n, pat, len, len_byte, trt,
1273 pos, pos_byte, lim, lim_byte);
1274 }
1275 }
1276 \f
1277 /* Do a simple string search N times for the string PAT,
1278 whose length is LEN/LEN_BYTE,
1279 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1280 TRT is the translation table.
1281
1282 Return the character position where the match is found.
1283 Otherwise, if M matches remained to be found, return -M.
1284
1285 This kind of search works regardless of what is in PAT and
1286 regardless of what is in TRT. It is used in cases where
1287 boyer_moore cannot work. */
1288
1289 static int
1290 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1291 int n;
1292 unsigned char *pat;
1293 int len, len_byte;
1294 Lisp_Object trt;
1295 int pos, pos_byte;
1296 int lim, lim_byte;
1297 {
1298 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1299 int forward = n > 0;
1300
1301 if (lim > pos && multibyte)
1302 while (n > 0)
1303 {
1304 while (1)
1305 {
1306 /* Try matching at position POS. */
1307 int this_pos = pos;
1308 int this_pos_byte = pos_byte;
1309 int this_len = len;
1310 int this_len_byte = len_byte;
1311 unsigned char *p = pat;
1312 if (pos + len > lim)
1313 goto stop;
1314
1315 while (this_len > 0)
1316 {
1317 int charlen, buf_charlen;
1318 int pat_ch, buf_ch;
1319
1320 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1321 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1322 ZV_BYTE - this_pos_byte,
1323 buf_charlen);
1324 TRANSLATE (buf_ch, trt, buf_ch);
1325
1326 if (buf_ch != pat_ch)
1327 break;
1328
1329 this_len_byte -= charlen;
1330 this_len--;
1331 p += charlen;
1332
1333 this_pos_byte += buf_charlen;
1334 this_pos++;
1335 }
1336
1337 if (this_len == 0)
1338 {
1339 pos += len;
1340 pos_byte += len_byte;
1341 break;
1342 }
1343
1344 INC_BOTH (pos, pos_byte);
1345 }
1346
1347 n--;
1348 }
1349 else if (lim > pos)
1350 while (n > 0)
1351 {
1352 while (1)
1353 {
1354 /* Try matching at position POS. */
1355 int this_pos = pos;
1356 int this_len = len;
1357 unsigned char *p = pat;
1358
1359 if (pos + len > lim)
1360 goto stop;
1361
1362 while (this_len > 0)
1363 {
1364 int pat_ch = *p++;
1365 int buf_ch = FETCH_BYTE (this_pos);
1366 TRANSLATE (buf_ch, trt, buf_ch);
1367
1368 if (buf_ch != pat_ch)
1369 break;
1370
1371 this_len--;
1372 this_pos++;
1373 }
1374
1375 if (this_len == 0)
1376 {
1377 pos += len;
1378 break;
1379 }
1380
1381 pos++;
1382 }
1383
1384 n--;
1385 }
1386 /* Backwards search. */
1387 else if (lim < pos && multibyte)
1388 while (n < 0)
1389 {
1390 while (1)
1391 {
1392 /* Try matching at position POS. */
1393 int this_pos = pos - len;
1394 int this_pos_byte = pos_byte - len_byte;
1395 int this_len = len;
1396 int this_len_byte = len_byte;
1397 unsigned char *p = pat;
1398
1399 if (pos - len < lim)
1400 goto stop;
1401
1402 while (this_len > 0)
1403 {
1404 int charlen, buf_charlen;
1405 int pat_ch, buf_ch;
1406
1407 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1408 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1409 ZV_BYTE - this_pos_byte,
1410 buf_charlen);
1411 TRANSLATE (buf_ch, trt, buf_ch);
1412
1413 if (buf_ch != pat_ch)
1414 break;
1415
1416 this_len_byte -= charlen;
1417 this_len--;
1418 p += charlen;
1419 this_pos_byte += buf_charlen;
1420 this_pos++;
1421 }
1422
1423 if (this_len == 0)
1424 {
1425 pos -= len;
1426 pos_byte -= len_byte;
1427 break;
1428 }
1429
1430 DEC_BOTH (pos, pos_byte);
1431 }
1432
1433 n++;
1434 }
1435 else if (lim < pos)
1436 while (n < 0)
1437 {
1438 while (1)
1439 {
1440 /* Try matching at position POS. */
1441 int this_pos = pos - len;
1442 int this_len = len;
1443 unsigned char *p = pat;
1444
1445 if (pos - len < lim)
1446 goto stop;
1447
1448 while (this_len > 0)
1449 {
1450 int pat_ch = *p++;
1451 int buf_ch = FETCH_BYTE (this_pos);
1452 TRANSLATE (buf_ch, trt, buf_ch);
1453
1454 if (buf_ch != pat_ch)
1455 break;
1456 this_len--;
1457 this_pos++;
1458 }
1459
1460 if (this_len == 0)
1461 {
1462 pos -= len;
1463 break;
1464 }
1465
1466 pos--;
1467 }
1468
1469 n++;
1470 }
1471
1472 stop:
1473 if (n == 0)
1474 {
1475 if (forward)
1476 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1477 else
1478 set_search_regs (multibyte ? pos_byte : pos, len_byte);
1479
1480 return pos;
1481 }
1482 else if (n > 0)
1483 return -n;
1484 else
1485 return n;
1486 }
1487 \f
1488 /* Do Boyer-Moore search N times for the string PAT,
1489 whose length is LEN/LEN_BYTE,
1490 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1491 DIRECTION says which direction we search in.
1492 TRT and INVERSE_TRT are translation tables.
1493
1494 This kind of search works if all the characters in PAT that have
1495 nontrivial translation are the same aside from the last byte. This
1496 makes it possible to translate just the last byte of a character,
1497 and do so after just a simple test of the context.
1498
1499 If that criterion is not satisfied, do not call this function. */
1500
1501 static int
1502 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1503 pos, pos_byte, lim, lim_byte, charset_base)
1504 int n;
1505 unsigned char *base_pat;
1506 int len, len_byte;
1507 Lisp_Object trt;
1508 Lisp_Object inverse_trt;
1509 int pos, pos_byte;
1510 int lim, lim_byte;
1511 int charset_base;
1512 {
1513 int direction = ((n > 0) ? 1 : -1);
1514 register int dirlen;
1515 int infinity, limit, k, stride_for_teases;
1516 register int *BM_tab;
1517 int *BM_tab_base;
1518 register unsigned char *cursor, *p_limit;
1519 register int i, j;
1520 unsigned char *pat, *pat_end;
1521 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1522
1523 unsigned char simple_translate[0400];
1524 int translate_prev_byte;
1525 int translate_anteprev_byte;
1526
1527 #ifdef C_ALLOCA
1528 int BM_tab_space[0400];
1529 BM_tab = &BM_tab_space[0];
1530 #else
1531 BM_tab = (int *) alloca (0400 * sizeof (int));
1532 #endif
1533 /* The general approach is that we are going to maintain that we know */
1534 /* the first (closest to the present position, in whatever direction */
1535 /* we're searching) character that could possibly be the last */
1536 /* (furthest from present position) character of a valid match. We */
1537 /* advance the state of our knowledge by looking at that character */
1538 /* and seeing whether it indeed matches the last character of the */
1539 /* pattern. If it does, we take a closer look. If it does not, we */
1540 /* move our pointer (to putative last characters) as far as is */
1541 /* logically possible. This amount of movement, which I call a */
1542 /* stride, will be the length of the pattern if the actual character */
1543 /* appears nowhere in the pattern, otherwise it will be the distance */
1544 /* from the last occurrence of that character to the end of the */
1545 /* pattern. */
1546 /* As a coding trick, an enormous stride is coded into the table for */
1547 /* characters that match the last character. This allows use of only */
1548 /* a single test, a test for having gone past the end of the */
1549 /* permissible match region, to test for both possible matches (when */
1550 /* the stride goes past the end immediately) and failure to */
1551 /* match (where you get nudged past the end one stride at a time). */
1552
1553 /* Here we make a "mickey mouse" BM table. The stride of the search */
1554 /* is determined only by the last character of the putative match. */
1555 /* If that character does not match, we will stride the proper */
1556 /* distance to propose a match that superimposes it on the last */
1557 /* instance of a character that matches it (per trt), or misses */
1558 /* it entirely if there is none. */
1559
1560 dirlen = len_byte * direction;
1561 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1562
1563 /* Record position after the end of the pattern. */
1564 pat_end = base_pat + len_byte;
1565 /* BASE_PAT points to a character that we start scanning from.
1566 It is the first character in a forward search,
1567 the last character in a backward search. */
1568 if (direction < 0)
1569 base_pat = pat_end - 1;
1570
1571 BM_tab_base = BM_tab;
1572 BM_tab += 0400;
1573 j = dirlen; /* to get it in a register */
1574 /* A character that does not appear in the pattern induces a */
1575 /* stride equal to the pattern length. */
1576 while (BM_tab_base != BM_tab)
1577 {
1578 *--BM_tab = j;
1579 *--BM_tab = j;
1580 *--BM_tab = j;
1581 *--BM_tab = j;
1582 }
1583
1584 /* We use this for translation, instead of TRT itself.
1585 We fill this in to handle the characters that actually
1586 occur in the pattern. Others don't matter anyway! */
1587 bzero (simple_translate, sizeof simple_translate);
1588 for (i = 0; i < 0400; i++)
1589 simple_translate[i] = i;
1590
1591 i = 0;
1592 while (i != infinity)
1593 {
1594 unsigned char *ptr = base_pat + i;
1595 i += direction;
1596 if (i == dirlen)
1597 i = infinity;
1598 if (! NILP (trt))
1599 {
1600 int ch;
1601 int untranslated;
1602 int this_translated = 1;
1603
1604 if (multibyte
1605 /* Is *PTR the last byte of a character? */
1606 && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1607 {
1608 unsigned char *charstart = ptr;
1609 while (! CHAR_HEAD_P (*charstart))
1610 charstart--;
1611 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1612 if (charset_base == (untranslated & ~0xff))
1613 {
1614 TRANSLATE (ch, trt, untranslated);
1615 if (! CHAR_HEAD_P (*ptr))
1616 {
1617 translate_prev_byte = ptr[-1];
1618 if (! CHAR_HEAD_P (translate_prev_byte))
1619 translate_anteprev_byte = ptr[-2];
1620 }
1621 }
1622 else
1623 {
1624 this_translated = 0;
1625 ch = *ptr;
1626 }
1627 }
1628 else if (!multibyte)
1629 TRANSLATE (ch, trt, *ptr);
1630 else
1631 {
1632 ch = *ptr;
1633 this_translated = 0;
1634 }
1635
1636 if (ch > 0400)
1637 j = ((unsigned char) ch) | 0200;
1638 else
1639 j = (unsigned char) ch;
1640
1641 if (i == infinity)
1642 stride_for_teases = BM_tab[j];
1643
1644 BM_tab[j] = dirlen - i;
1645 /* A translation table is accompanied by its inverse -- see */
1646 /* comment following downcase_table for details */
1647 if (this_translated)
1648 {
1649 int starting_ch = ch;
1650 int starting_j = j;
1651 while (1)
1652 {
1653 TRANSLATE (ch, inverse_trt, ch);
1654 if (ch > 0400)
1655 j = ((unsigned char) ch) | 0200;
1656 else
1657 j = (unsigned char) ch;
1658
1659 /* For all the characters that map into CH,
1660 set up simple_translate to map the last byte
1661 into STARTING_J. */
1662 simple_translate[j] = starting_j;
1663 if (ch == starting_ch)
1664 break;
1665 BM_tab[j] = dirlen - i;
1666 }
1667 }
1668 }
1669 else
1670 {
1671 j = *ptr;
1672
1673 if (i == infinity)
1674 stride_for_teases = BM_tab[j];
1675 BM_tab[j] = dirlen - i;
1676 }
1677 /* stride_for_teases tells how much to stride if we get a */
1678 /* match on the far character but are subsequently */
1679 /* disappointed, by recording what the stride would have been */
1680 /* for that character if the last character had been */
1681 /* different. */
1682 }
1683 infinity = dirlen - infinity;
1684 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1685 /* loop invariant - POS_BYTE points at where last char (first
1686 char if reverse) of pattern would align in a possible match. */
1687 while (n != 0)
1688 {
1689 int tail_end;
1690 unsigned char *tail_end_ptr;
1691
1692 /* It's been reported that some (broken) compiler thinks that
1693 Boolean expressions in an arithmetic context are unsigned.
1694 Using an explicit ?1:0 prevents this. */
1695 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1696 < 0)
1697 return (n * (0 - direction));
1698 /* First we do the part we can by pointers (maybe nothing) */
1699 QUIT;
1700 pat = base_pat;
1701 limit = pos_byte - dirlen + direction;
1702 if (direction > 0)
1703 {
1704 limit = BUFFER_CEILING_OF (limit);
1705 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1706 can take on without hitting edge of buffer or the gap. */
1707 limit = min (limit, pos_byte + 20000);
1708 limit = min (limit, lim_byte - 1);
1709 }
1710 else
1711 {
1712 limit = BUFFER_FLOOR_OF (limit);
1713 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1714 can take on without hitting edge of buffer or the gap. */
1715 limit = max (limit, pos_byte - 20000);
1716 limit = max (limit, lim_byte);
1717 }
1718 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1719 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1720
1721 if ((limit - pos_byte) * direction > 20)
1722 {
1723 unsigned char *p2;
1724
1725 p_limit = BYTE_POS_ADDR (limit);
1726 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1727 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1728 while (1) /* use one cursor setting as long as i can */
1729 {
1730 if (direction > 0) /* worth duplicating */
1731 {
1732 /* Use signed comparison if appropriate
1733 to make cursor+infinity sure to be > p_limit.
1734 Assuming that the buffer lies in a range of addresses
1735 that are all "positive" (as ints) or all "negative",
1736 either kind of comparison will work as long
1737 as we don't step by infinity. So pick the kind
1738 that works when we do step by infinity. */
1739 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1740 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1741 cursor += BM_tab[*cursor];
1742 else
1743 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1744 cursor += BM_tab[*cursor];
1745 }
1746 else
1747 {
1748 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1749 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1750 cursor += BM_tab[*cursor];
1751 else
1752 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1753 cursor += BM_tab[*cursor];
1754 }
1755 /* If you are here, cursor is beyond the end of the searched region. */
1756 /* This can happen if you match on the far character of the pattern, */
1757 /* because the "stride" of that character is infinity, a number able */
1758 /* to throw you well beyond the end of the search. It can also */
1759 /* happen if you fail to match within the permitted region and would */
1760 /* otherwise try a character beyond that region */
1761 if ((cursor - p_limit) * direction <= len_byte)
1762 break; /* a small overrun is genuine */
1763 cursor -= infinity; /* large overrun = hit */
1764 i = dirlen - direction;
1765 if (! NILP (trt))
1766 {
1767 while ((i -= direction) + direction != 0)
1768 {
1769 int ch;
1770 cursor -= direction;
1771 /* Translate only the last byte of a character. */
1772 if (! multibyte
1773 || ((cursor == tail_end_ptr
1774 || CHAR_HEAD_P (cursor[1]))
1775 && (CHAR_HEAD_P (cursor[0])
1776 || (translate_prev_byte == cursor[-1]
1777 && (CHAR_HEAD_P (translate_prev_byte)
1778 || translate_anteprev_byte == cursor[-2])))))
1779 ch = simple_translate[*cursor];
1780 else
1781 ch = *cursor;
1782 if (pat[i] != ch)
1783 break;
1784 }
1785 }
1786 else
1787 {
1788 while ((i -= direction) + direction != 0)
1789 {
1790 cursor -= direction;
1791 if (pat[i] != *cursor)
1792 break;
1793 }
1794 }
1795 cursor += dirlen - i - direction; /* fix cursor */
1796 if (i + direction == 0)
1797 {
1798 int position;
1799
1800 cursor -= direction;
1801
1802 position = pos_byte + cursor - p2 + ((direction > 0)
1803 ? 1 - len_byte : 0);
1804 set_search_regs (position, len_byte);
1805
1806 if ((n -= direction) != 0)
1807 cursor += dirlen; /* to resume search */
1808 else
1809 return ((direction > 0)
1810 ? search_regs.end[0] : search_regs.start[0]);
1811 }
1812 else
1813 cursor += stride_for_teases; /* <sigh> we lose - */
1814 }
1815 pos_byte += cursor - p2;
1816 }
1817 else
1818 /* Now we'll pick up a clump that has to be done the hard */
1819 /* way because it covers a discontinuity */
1820 {
1821 limit = ((direction > 0)
1822 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1823 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1824 limit = ((direction > 0)
1825 ? min (limit + len_byte, lim_byte - 1)
1826 : max (limit - len_byte, lim_byte));
1827 /* LIMIT is now the last value POS_BYTE can have
1828 and still be valid for a possible match. */
1829 while (1)
1830 {
1831 /* This loop can be coded for space rather than */
1832 /* speed because it will usually run only once. */
1833 /* (the reach is at most len + 21, and typically */
1834 /* does not exceed len) */
1835 while ((limit - pos_byte) * direction >= 0)
1836 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1837 /* now run the same tests to distinguish going off the */
1838 /* end, a match or a phony match. */
1839 if ((pos_byte - limit) * direction <= len_byte)
1840 break; /* ran off the end */
1841 /* Found what might be a match.
1842 Set POS_BYTE back to last (first if reverse) pos. */
1843 pos_byte -= infinity;
1844 i = dirlen - direction;
1845 while ((i -= direction) + direction != 0)
1846 {
1847 int ch;
1848 unsigned char *ptr;
1849 pos_byte -= direction;
1850 ptr = BYTE_POS_ADDR (pos_byte);
1851 /* Translate only the last byte of a character. */
1852 if (! multibyte
1853 || ((ptr == tail_end_ptr
1854 || CHAR_HEAD_P (ptr[1]))
1855 && (CHAR_HEAD_P (ptr[0])
1856 || (translate_prev_byte == ptr[-1]
1857 && (CHAR_HEAD_P (translate_prev_byte)
1858 || translate_anteprev_byte == ptr[-2])))))
1859 ch = simple_translate[*ptr];
1860 else
1861 ch = *ptr;
1862 if (pat[i] != ch)
1863 break;
1864 }
1865 /* Above loop has moved POS_BYTE part or all the way
1866 back to the first pos (last pos if reverse).
1867 Set it once again at the last (first if reverse) char. */
1868 pos_byte += dirlen - i- direction;
1869 if (i + direction == 0)
1870 {
1871 int position;
1872 pos_byte -= direction;
1873
1874 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1875
1876 set_search_regs (position, len_byte);
1877
1878 if ((n -= direction) != 0)
1879 pos_byte += dirlen; /* to resume search */
1880 else
1881 return ((direction > 0)
1882 ? search_regs.end[0] : search_regs.start[0]);
1883 }
1884 else
1885 pos_byte += stride_for_teases;
1886 }
1887 }
1888 /* We have done one clump. Can we continue? */
1889 if ((lim_byte - pos_byte) * direction < 0)
1890 return ((0 - n) * direction);
1891 }
1892 return BYTE_TO_CHAR (pos_byte);
1893 }
1894
1895 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1896 for the overall match just found in the current buffer.
1897 Also clear out the match data for registers 1 and up. */
1898
1899 static void
1900 set_search_regs (beg_byte, nbytes)
1901 int beg_byte, nbytes;
1902 {
1903 int i;
1904
1905 /* Make sure we have registers in which to store
1906 the match position. */
1907 if (search_regs.num_regs == 0)
1908 {
1909 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1910 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1911 search_regs.num_regs = 2;
1912 }
1913
1914 /* Clear out the other registers. */
1915 for (i = 1; i < search_regs.num_regs; i++)
1916 {
1917 search_regs.start[i] = -1;
1918 search_regs.end[i] = -1;
1919 }
1920
1921 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1922 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1923 XSETBUFFER (last_thing_searched, current_buffer);
1924 }
1925 \f
1926 /* Given a string of words separated by word delimiters,
1927 compute a regexp that matches those exact words
1928 separated by arbitrary punctuation. */
1929
1930 static Lisp_Object
1931 wordify (string)
1932 Lisp_Object string;
1933 {
1934 register unsigned char *p, *o;
1935 register int i, i_byte, len, punct_count = 0, word_count = 0;
1936 Lisp_Object val;
1937 int prev_c = 0;
1938 int adjust;
1939
1940 CHECK_STRING (string, 0);
1941 p = XSTRING (string)->data;
1942 len = XSTRING (string)->size;
1943
1944 for (i = 0, i_byte = 0; i < len; )
1945 {
1946 int c;
1947
1948 if (STRING_MULTIBYTE (string))
1949 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1950 else
1951 c = XSTRING (string)->data[i++];
1952
1953 if (SYNTAX (c) != Sword)
1954 {
1955 punct_count++;
1956 if (i > 0 && SYNTAX (prev_c) == Sword)
1957 word_count++;
1958 }
1959
1960 prev_c = c;
1961 }
1962
1963 if (SYNTAX (prev_c) == Sword)
1964 word_count++;
1965 if (!word_count)
1966 return build_string ("");
1967
1968 adjust = - punct_count + 5 * (word_count - 1) + 4;
1969 val = make_uninit_multibyte_string (len + adjust,
1970 STRING_BYTES (XSTRING (string)) + adjust);
1971
1972 o = XSTRING (val)->data;
1973 *o++ = '\\';
1974 *o++ = 'b';
1975 prev_c = 0;
1976
1977 for (i = 0, i_byte = 0; i < len; )
1978 {
1979 int c;
1980 int i_byte_orig = i_byte;
1981
1982 if (STRING_MULTIBYTE (string))
1983 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1984 else
1985 c = XSTRING (string)->data[i++];
1986
1987 if (SYNTAX (c) == Sword)
1988 {
1989 bcopy (&XSTRING (string)->data[i_byte_orig], o,
1990 i_byte - i_byte_orig);
1991 o += i_byte - i_byte_orig;
1992 }
1993 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
1994 {
1995 *o++ = '\\';
1996 *o++ = 'W';
1997 *o++ = '\\';
1998 *o++ = 'W';
1999 *o++ = '*';
2000 }
2001
2002 prev_c = c;
2003 }
2004
2005 *o++ = '\\';
2006 *o++ = 'b';
2007
2008 return val;
2009 }
2010 \f
2011 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2012 "MSearch backward: ",
2013 "Search backward from point for STRING.\n\
2014 Set point to the beginning of the occurrence found, and return point.\n\
2015 An optional second argument bounds the search; it is a buffer position.\n\
2016 The match found must not extend before that position.\n\
2017 Optional third argument, if t, means if fail just return nil (no error).\n\
2018 If not nil and not t, position at limit of search and return nil.\n\
2019 Optional fourth argument is repeat count--search for successive occurrences.\n\
2020 See also the functions `match-beginning', `match-end' and `replace-match'.")
2021 (string, bound, noerror, count)
2022 Lisp_Object string, bound, noerror, count;
2023 {
2024 return search_command (string, bound, noerror, count, -1, 0, 0);
2025 }
2026
2027 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2028 "Search forward from point for STRING.\n\
2029 Set point to the end of the occurrence found, and return point.\n\
2030 An optional second argument bounds the search; it is a buffer position.\n\
2031 The match found must not extend after that position. nil is equivalent\n\
2032 to (point-max).\n\
2033 Optional third argument, if t, means if fail just return nil (no error).\n\
2034 If not nil and not t, move to limit of search and return nil.\n\
2035 Optional fourth argument is repeat count--search for successive occurrences.\n\
2036 See also the functions `match-beginning', `match-end' and `replace-match'.")
2037 (string, bound, noerror, count)
2038 Lisp_Object string, bound, noerror, count;
2039 {
2040 return search_command (string, bound, noerror, count, 1, 0, 0);
2041 }
2042
2043 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2044 "sWord search backward: ",
2045 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2046 Set point to the beginning of the occurrence found, and return point.\n\
2047 An optional second argument bounds the search; it is a buffer position.\n\
2048 The match found must not extend before that position.\n\
2049 Optional third argument, if t, means if fail just return nil (no error).\n\
2050 If not nil and not t, move to limit of search and return nil.\n\
2051 Optional fourth argument is repeat count--search for successive occurrences.")
2052 (string, bound, noerror, count)
2053 Lisp_Object string, bound, noerror, count;
2054 {
2055 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2056 }
2057
2058 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2059 "sWord search: ",
2060 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2061 Set point to the end of the occurrence found, and return point.\n\
2062 An optional second argument bounds the search; it is a buffer position.\n\
2063 The match found must not extend after that position.\n\
2064 Optional third argument, if t, means if fail just return nil (no error).\n\
2065 If not nil and not t, move to limit of search and return nil.\n\
2066 Optional fourth argument is repeat count--search for successive occurrences.")
2067 (string, bound, noerror, count)
2068 Lisp_Object string, bound, noerror, count;
2069 {
2070 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2071 }
2072
2073 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2074 "sRE search backward: ",
2075 "Search backward from point for match for regular expression REGEXP.\n\
2076 Set point to the beginning of the match, and return point.\n\
2077 The match found is the one starting last in the buffer\n\
2078 and yet ending before the origin of the search.\n\
2079 An optional second argument bounds the search; it is a buffer position.\n\
2080 The match found must start at or after that position.\n\
2081 Optional third argument, if t, means if fail just return nil (no error).\n\
2082 If not nil and not t, move to limit of search and return nil.\n\
2083 Optional fourth argument is repeat count--search for successive occurrences.\n\
2084 See also the functions `match-beginning', `match-end' and `replace-match'.")
2085 (regexp, bound, noerror, count)
2086 Lisp_Object regexp, bound, noerror, count;
2087 {
2088 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2089 }
2090
2091 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2092 "sRE search: ",
2093 "Search forward from point for regular expression REGEXP.\n\
2094 Set point to the end of the occurrence found, and return point.\n\
2095 An optional second argument bounds the search; it is a buffer position.\n\
2096 The match found must not extend after that position.\n\
2097 Optional third argument, if t, means if fail just return nil (no error).\n\
2098 If not nil and not t, move to limit of search and return nil.\n\
2099 Optional fourth argument is repeat count--search for successive occurrences.\n\
2100 See also the functions `match-beginning', `match-end' and `replace-match'.")
2101 (regexp, bound, noerror, count)
2102 Lisp_Object regexp, bound, noerror, count;
2103 {
2104 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2105 }
2106
2107 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2108 "sPosix search backward: ",
2109 "Search backward from point for match for regular expression REGEXP.\n\
2110 Find the longest match in accord with Posix regular expression rules.\n\
2111 Set point to the beginning of the match, and return point.\n\
2112 The match found is the one starting last in the buffer\n\
2113 and yet ending before the origin of the search.\n\
2114 An optional second argument bounds the search; it is a buffer position.\n\
2115 The match found must start at or after that position.\n\
2116 Optional third argument, if t, means if fail just return nil (no error).\n\
2117 If not nil and not t, move to limit of search and return nil.\n\
2118 Optional fourth argument is repeat count--search for successive occurrences.\n\
2119 See also the functions `match-beginning', `match-end' and `replace-match'.")
2120 (regexp, bound, noerror, count)
2121 Lisp_Object regexp, bound, noerror, count;
2122 {
2123 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2124 }
2125
2126 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2127 "sPosix search: ",
2128 "Search forward from point for regular expression REGEXP.\n\
2129 Find the longest match in accord with Posix regular expression rules.\n\
2130 Set point to the end of the occurrence found, and return point.\n\
2131 An optional second argument bounds the search; it is a buffer position.\n\
2132 The match found must not extend after that position.\n\
2133 Optional third argument, if t, means if fail just return nil (no error).\n\
2134 If not nil and not t, move to limit of search and return nil.\n\
2135 Optional fourth argument is repeat count--search for successive occurrences.\n\
2136 See also the functions `match-beginning', `match-end' and `replace-match'.")
2137 (regexp, bound, noerror, count)
2138 Lisp_Object regexp, bound, noerror, count;
2139 {
2140 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2141 }
2142 \f
2143 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2144 "Replace text matched by last search with NEWTEXT.\n\
2145 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2146 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2147 based on the replaced text.\n\
2148 If the replaced text has only capital letters\n\
2149 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2150 If the replaced text has at least one word starting with a capital letter,\n\
2151 then capitalize each word in NEWTEXT.\n\n\
2152 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2153 Otherwise treat `\\' as special:\n\
2154 `\\&' in NEWTEXT means substitute original matched text.\n\
2155 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2156 If Nth parens didn't match, substitute nothing.\n\
2157 `\\\\' means insert one `\\'.\n\
2158 FIXEDCASE and LITERAL are optional arguments.\n\
2159 Leaves point at end of replacement text.\n\
2160 \n\
2161 The optional fourth argument STRING can be a string to modify.\n\
2162 In that case, this function creates and returns a new string\n\
2163 which is made by replacing the part of STRING that was matched.\n\
2164 \n\
2165 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2166 It says to replace just that subexpression instead of the whole match.\n\
2167 This is useful only after a regular expression search or match\n\
2168 since only regular expressions have distinguished subexpressions.")
2169 (newtext, fixedcase, literal, string, subexp)
2170 Lisp_Object newtext, fixedcase, literal, string, subexp;
2171 {
2172 enum { nochange, all_caps, cap_initial } case_action;
2173 register int pos, last;
2174 int some_multiletter_word;
2175 int some_lowercase;
2176 int some_uppercase;
2177 int some_nonuppercase_initial;
2178 register int c, prevc;
2179 int inslen;
2180 int sub;
2181 int opoint, newpoint;
2182
2183 CHECK_STRING (newtext, 0);
2184
2185 if (! NILP (string))
2186 CHECK_STRING (string, 4);
2187
2188 case_action = nochange; /* We tried an initialization */
2189 /* but some C compilers blew it */
2190
2191 if (search_regs.num_regs <= 0)
2192 error ("replace-match called before any match found");
2193
2194 if (NILP (subexp))
2195 sub = 0;
2196 else
2197 {
2198 CHECK_NUMBER (subexp, 3);
2199 sub = XINT (subexp);
2200 if (sub < 0 || sub >= search_regs.num_regs)
2201 args_out_of_range (subexp, make_number (search_regs.num_regs));
2202 }
2203
2204 if (NILP (string))
2205 {
2206 if (search_regs.start[sub] < BEGV
2207 || search_regs.start[sub] > search_regs.end[sub]
2208 || search_regs.end[sub] > ZV)
2209 args_out_of_range (make_number (search_regs.start[sub]),
2210 make_number (search_regs.end[sub]));
2211 }
2212 else
2213 {
2214 if (search_regs.start[sub] < 0
2215 || search_regs.start[sub] > search_regs.end[sub]
2216 || search_regs.end[sub] > XSTRING (string)->size)
2217 args_out_of_range (make_number (search_regs.start[sub]),
2218 make_number (search_regs.end[sub]));
2219 }
2220
2221 if (NILP (fixedcase))
2222 {
2223 int beg;
2224 /* Decide how to casify by examining the matched text. */
2225
2226 if (NILP (string))
2227 last = CHAR_TO_BYTE (search_regs.end[sub]);
2228 else
2229 last = search_regs.end[sub];
2230
2231 if (NILP (string))
2232 beg = CHAR_TO_BYTE (search_regs.start[sub]);
2233 else
2234 beg = search_regs.start[sub];
2235
2236 prevc = '\n';
2237 case_action = all_caps;
2238
2239 /* some_multiletter_word is set nonzero if any original word
2240 is more than one letter long. */
2241 some_multiletter_word = 0;
2242 some_lowercase = 0;
2243 some_nonuppercase_initial = 0;
2244 some_uppercase = 0;
2245
2246 for (pos = beg; pos < last; pos++)
2247 {
2248 if (NILP (string))
2249 c = FETCH_BYTE (pos);
2250 else
2251 c = XSTRING (string)->data[pos];
2252
2253 if (LOWERCASEP (c))
2254 {
2255 /* Cannot be all caps if any original char is lower case */
2256
2257 some_lowercase = 1;
2258 if (SYNTAX (prevc) != Sword)
2259 some_nonuppercase_initial = 1;
2260 else
2261 some_multiletter_word = 1;
2262 }
2263 else if (!NOCASEP (c))
2264 {
2265 some_uppercase = 1;
2266 if (SYNTAX (prevc) != Sword)
2267 ;
2268 else
2269 some_multiletter_word = 1;
2270 }
2271 else
2272 {
2273 /* If the initial is a caseless word constituent,
2274 treat that like a lowercase initial. */
2275 if (SYNTAX (prevc) != Sword)
2276 some_nonuppercase_initial = 1;
2277 }
2278
2279 prevc = c;
2280 }
2281
2282 /* Convert to all caps if the old text is all caps
2283 and has at least one multiletter word. */
2284 if (! some_lowercase && some_multiletter_word)
2285 case_action = all_caps;
2286 /* Capitalize each word, if the old text has all capitalized words. */
2287 else if (!some_nonuppercase_initial && some_multiletter_word)
2288 case_action = cap_initial;
2289 else if (!some_nonuppercase_initial && some_uppercase)
2290 /* Should x -> yz, operating on X, give Yz or YZ?
2291 We'll assume the latter. */
2292 case_action = all_caps;
2293 else
2294 case_action = nochange;
2295 }
2296
2297 /* Do replacement in a string. */
2298 if (!NILP (string))
2299 {
2300 Lisp_Object before, after;
2301
2302 before = Fsubstring (string, make_number (0),
2303 make_number (search_regs.start[sub]));
2304 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2305
2306 /* Substitute parts of the match into NEWTEXT
2307 if desired. */
2308 if (NILP (literal))
2309 {
2310 int lastpos = 0;
2311 int lastpos_byte = 0;
2312 /* We build up the substituted string in ACCUM. */
2313 Lisp_Object accum;
2314 Lisp_Object middle;
2315 int pos_byte;
2316
2317 accum = Qnil;
2318
2319 for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
2320 {
2321 int substart = -1;
2322 int subend;
2323 int delbackslash = 0;
2324
2325 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2326
2327 if (c == '\\')
2328 {
2329 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2330 if (c == '&')
2331 {
2332 substart = search_regs.start[sub];
2333 subend = search_regs.end[sub];
2334 }
2335 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2336 {
2337 if (search_regs.start[c - '0'] >= 0)
2338 {
2339 substart = search_regs.start[c - '0'];
2340 subend = search_regs.end[c - '0'];
2341 }
2342 }
2343 else if (c == '\\')
2344 delbackslash = 1;
2345 else
2346 error ("Invalid use of `\\' in replacement text");
2347 }
2348 if (substart >= 0)
2349 {
2350 if (pos - 2 != lastpos)
2351 middle = substring_both (newtext, lastpos,
2352 lastpos_byte,
2353 pos - 2, pos_byte - 2);
2354 else
2355 middle = Qnil;
2356 accum = concat3 (accum, middle,
2357 Fsubstring (string,
2358 make_number (substart),
2359 make_number (subend)));
2360 lastpos = pos;
2361 lastpos_byte = pos_byte;
2362 }
2363 else if (delbackslash)
2364 {
2365 middle = substring_both (newtext, lastpos,
2366 lastpos_byte,
2367 pos - 1, pos_byte - 1);
2368
2369 accum = concat2 (accum, middle);
2370 lastpos = pos;
2371 lastpos_byte = pos_byte;
2372 }
2373 }
2374
2375 if (pos != lastpos)
2376 middle = substring_both (newtext, lastpos,
2377 lastpos_byte,
2378 pos, pos_byte);
2379 else
2380 middle = Qnil;
2381
2382 newtext = concat2 (accum, middle);
2383 }
2384
2385 /* Do case substitution in NEWTEXT if desired. */
2386 if (case_action == all_caps)
2387 newtext = Fupcase (newtext);
2388 else if (case_action == cap_initial)
2389 newtext = Fupcase_initials (newtext);
2390
2391 return concat3 (before, newtext, after);
2392 }
2393
2394 /* Record point, the move (quietly) to the start of the match. */
2395 if (PT > search_regs.start[sub])
2396 opoint = PT - ZV;
2397 else
2398 opoint = PT;
2399
2400 TEMP_SET_PT (search_regs.start[sub]);
2401
2402 /* We insert the replacement text before the old text, and then
2403 delete the original text. This means that markers at the
2404 beginning or end of the original will float to the corresponding
2405 position in the replacement. */
2406 if (!NILP (literal))
2407 Finsert_and_inherit (1, &newtext);
2408 else
2409 {
2410 struct gcpro gcpro1;
2411 GCPRO1 (newtext);
2412
2413 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
2414 {
2415 int offset = PT - search_regs.start[sub];
2416
2417 c = XSTRING (newtext)->data[pos];
2418 if (c == '\\')
2419 {
2420 c = XSTRING (newtext)->data[++pos];
2421 if (c == '&')
2422 Finsert_buffer_substring
2423 (Fcurrent_buffer (),
2424 make_number (search_regs.start[sub] + offset),
2425 make_number (search_regs.end[sub] + offset));
2426 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2427 {
2428 if (search_regs.start[c - '0'] >= 1)
2429 Finsert_buffer_substring
2430 (Fcurrent_buffer (),
2431 make_number (search_regs.start[c - '0'] + offset),
2432 make_number (search_regs.end[c - '0'] + offset));
2433 }
2434 else if (c == '\\')
2435 insert_char (c);
2436 else
2437 error ("Invalid use of `\\' in replacement text");
2438 }
2439 else
2440 insert_char (c);
2441 }
2442 UNGCPRO;
2443 }
2444
2445 inslen = PT - (search_regs.start[sub]);
2446 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
2447
2448 if (case_action == all_caps)
2449 Fupcase_region (make_number (PT - inslen), make_number (PT));
2450 else if (case_action == cap_initial)
2451 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
2452
2453 newpoint = PT;
2454
2455 /* Put point back where it was in the text. */
2456 if (opoint <= 0)
2457 TEMP_SET_PT (opoint + ZV);
2458 else
2459 TEMP_SET_PT (opoint);
2460
2461 /* Now move point "officially" to the start of the inserted replacement. */
2462 move_if_not_intangible (newpoint);
2463
2464 return Qnil;
2465 }
2466 \f
2467 static Lisp_Object
2468 match_limit (num, beginningp)
2469 Lisp_Object num;
2470 int beginningp;
2471 {
2472 register int n;
2473
2474 CHECK_NUMBER (num, 0);
2475 n = XINT (num);
2476 if (n < 0 || n >= search_regs.num_regs)
2477 args_out_of_range (num, make_number (search_regs.num_regs));
2478 if (search_regs.num_regs <= 0
2479 || search_regs.start[n] < 0)
2480 return Qnil;
2481 return (make_number ((beginningp) ? search_regs.start[n]
2482 : search_regs.end[n]));
2483 }
2484
2485 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2486 "Return position of start of text matched by last search.\n\
2487 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2488 regexp.\n\
2489 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2490 SUBEXP pairs.\n\
2491 Zero means the entire text matched by the whole regexp or whole string.")
2492 (subexp)
2493 Lisp_Object subexp;
2494 {
2495 return match_limit (subexp, 1);
2496 }
2497
2498 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2499 "Return position of end of text matched by last search.\n\
2500 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2501 regexp.\n\
2502 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2503 SUBEXP pairs.\n\
2504 Zero means the entire text matched by the whole regexp or whole string.")
2505 (subexp)
2506 Lisp_Object subexp;
2507 {
2508 return match_limit (subexp, 0);
2509 }
2510
2511 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2512 "Return a list containing all info on what the last search matched.\n\
2513 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2514 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2515 if the last match was on a buffer; integers or nil if a string was matched.\n\
2516 Use `store-match-data' to reinstate the data in this list.\n\
2517 \n\
2518 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2519 \(rather than markers) to represent buffer positions.\n\
2520 If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2521 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2522 (integers, reuse)
2523 Lisp_Object integers, reuse;
2524 {
2525 Lisp_Object tail, prev;
2526 Lisp_Object *data;
2527 int i, len;
2528
2529 if (NILP (last_thing_searched))
2530 return Qnil;
2531
2532 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2533 * sizeof (Lisp_Object));
2534
2535 len = -1;
2536 for (i = 0; i < search_regs.num_regs; i++)
2537 {
2538 int start = search_regs.start[i];
2539 if (start >= 0)
2540 {
2541 if (EQ (last_thing_searched, Qt)
2542 || ! NILP (integers))
2543 {
2544 XSETFASTINT (data[2 * i], start);
2545 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2546 }
2547 else if (BUFFERP (last_thing_searched))
2548 {
2549 data[2 * i] = Fmake_marker ();
2550 Fset_marker (data[2 * i],
2551 make_number (start),
2552 last_thing_searched);
2553 data[2 * i + 1] = Fmake_marker ();
2554 Fset_marker (data[2 * i + 1],
2555 make_number (search_regs.end[i]),
2556 last_thing_searched);
2557 }
2558 else
2559 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2560 abort ();
2561
2562 len = i;
2563 }
2564 else
2565 data[2 * i] = data [2 * i + 1] = Qnil;
2566 }
2567
2568 /* If REUSE is not usable, cons up the values and return them. */
2569 if (! CONSP (reuse))
2570 return Flist (2 * len + 2, data);
2571
2572 /* If REUSE is a list, store as many value elements as will fit
2573 into the elements of REUSE. */
2574 for (i = 0, tail = reuse; CONSP (tail);
2575 i++, tail = XCONS (tail)->cdr)
2576 {
2577 if (i < 2 * len + 2)
2578 XCONS (tail)->car = data[i];
2579 else
2580 XCONS (tail)->car = Qnil;
2581 prev = tail;
2582 }
2583
2584 /* If we couldn't fit all value elements into REUSE,
2585 cons up the rest of them and add them to the end of REUSE. */
2586 if (i < 2 * len + 2)
2587 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2588
2589 return reuse;
2590 }
2591
2592
2593 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2594 "Set internal data on last search match from elements of LIST.\n\
2595 LIST should have been created by calling `match-data' previously.")
2596 (list)
2597 register Lisp_Object list;
2598 {
2599 register int i;
2600 register Lisp_Object marker;
2601
2602 if (running_asynch_code)
2603 save_search_regs ();
2604
2605 if (!CONSP (list) && !NILP (list))
2606 list = wrong_type_argument (Qconsp, list);
2607
2608 /* Unless we find a marker with a buffer in LIST, assume that this
2609 match data came from a string. */
2610 last_thing_searched = Qt;
2611
2612 /* Allocate registers if they don't already exist. */
2613 {
2614 int length = XFASTINT (Flength (list)) / 2;
2615
2616 if (length > search_regs.num_regs)
2617 {
2618 if (search_regs.num_regs == 0)
2619 {
2620 search_regs.start
2621 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2622 search_regs.end
2623 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2624 }
2625 else
2626 {
2627 search_regs.start
2628 = (regoff_t *) xrealloc (search_regs.start,
2629 length * sizeof (regoff_t));
2630 search_regs.end
2631 = (regoff_t *) xrealloc (search_regs.end,
2632 length * sizeof (regoff_t));
2633 }
2634
2635 search_regs.num_regs = length;
2636 }
2637 }
2638
2639 for (i = 0; i < search_regs.num_regs; i++)
2640 {
2641 marker = Fcar (list);
2642 if (NILP (marker))
2643 {
2644 search_regs.start[i] = -1;
2645 list = Fcdr (list);
2646 }
2647 else
2648 {
2649 if (MARKERP (marker))
2650 {
2651 if (XMARKER (marker)->buffer == 0)
2652 XSETFASTINT (marker, 0);
2653 else
2654 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2655 }
2656
2657 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2658 search_regs.start[i] = XINT (marker);
2659 list = Fcdr (list);
2660
2661 marker = Fcar (list);
2662 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2663 XSETFASTINT (marker, 0);
2664
2665 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2666 search_regs.end[i] = XINT (marker);
2667 }
2668 list = Fcdr (list);
2669 }
2670
2671 return Qnil;
2672 }
2673
2674 /* If non-zero the match data have been saved in saved_search_regs
2675 during the execution of a sentinel or filter. */
2676 static int search_regs_saved;
2677 static struct re_registers saved_search_regs;
2678
2679 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2680 if asynchronous code (filter or sentinel) is running. */
2681 static void
2682 save_search_regs ()
2683 {
2684 if (!search_regs_saved)
2685 {
2686 saved_search_regs.num_regs = search_regs.num_regs;
2687 saved_search_regs.start = search_regs.start;
2688 saved_search_regs.end = search_regs.end;
2689 search_regs.num_regs = 0;
2690 search_regs.start = 0;
2691 search_regs.end = 0;
2692
2693 search_regs_saved = 1;
2694 }
2695 }
2696
2697 /* Called upon exit from filters and sentinels. */
2698 void
2699 restore_match_data ()
2700 {
2701 if (search_regs_saved)
2702 {
2703 if (search_regs.num_regs > 0)
2704 {
2705 xfree (search_regs.start);
2706 xfree (search_regs.end);
2707 }
2708 search_regs.num_regs = saved_search_regs.num_regs;
2709 search_regs.start = saved_search_regs.start;
2710 search_regs.end = saved_search_regs.end;
2711
2712 search_regs_saved = 0;
2713 }
2714 }
2715
2716 /* Quote a string to inactivate reg-expr chars */
2717
2718 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2719 "Return a regexp string which matches exactly STRING and nothing else.")
2720 (string)
2721 Lisp_Object string;
2722 {
2723 register unsigned char *in, *out, *end;
2724 register unsigned char *temp;
2725 int backslashes_added = 0;
2726
2727 CHECK_STRING (string, 0);
2728
2729 temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
2730
2731 /* Now copy the data into the new string, inserting escapes. */
2732
2733 in = XSTRING (string)->data;
2734 end = in + STRING_BYTES (XSTRING (string));
2735 out = temp;
2736
2737 for (; in != end; in++)
2738 {
2739 if (*in == '[' || *in == ']'
2740 || *in == '*' || *in == '.' || *in == '\\'
2741 || *in == '?' || *in == '+'
2742 || *in == '^' || *in == '$')
2743 *out++ = '\\', backslashes_added++;
2744 *out++ = *in;
2745 }
2746
2747 return make_specified_string (temp,
2748 XSTRING (string)->size + backslashes_added,
2749 out - temp,
2750 STRING_MULTIBYTE (string));
2751 }
2752 \f
2753 void
2754 syms_of_search ()
2755 {
2756 register int i;
2757
2758 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2759 {
2760 searchbufs[i].buf.allocated = 100;
2761 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2762 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2763 searchbufs[i].regexp = Qnil;
2764 staticpro (&searchbufs[i].regexp);
2765 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2766 }
2767 searchbuf_head = &searchbufs[0];
2768
2769 Qsearch_failed = intern ("search-failed");
2770 staticpro (&Qsearch_failed);
2771 Qinvalid_regexp = intern ("invalid-regexp");
2772 staticpro (&Qinvalid_regexp);
2773
2774 Fput (Qsearch_failed, Qerror_conditions,
2775 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2776 Fput (Qsearch_failed, Qerror_message,
2777 build_string ("Search failed"));
2778
2779 Fput (Qinvalid_regexp, Qerror_conditions,
2780 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2781 Fput (Qinvalid_regexp, Qerror_message,
2782 build_string ("Invalid regexp"));
2783
2784 last_thing_searched = Qnil;
2785 staticpro (&last_thing_searched);
2786
2787 defsubr (&Slooking_at);
2788 defsubr (&Sposix_looking_at);
2789 defsubr (&Sstring_match);
2790 defsubr (&Sposix_string_match);
2791 defsubr (&Ssearch_forward);
2792 defsubr (&Ssearch_backward);
2793 defsubr (&Sword_search_forward);
2794 defsubr (&Sword_search_backward);
2795 defsubr (&Sre_search_forward);
2796 defsubr (&Sre_search_backward);
2797 defsubr (&Sposix_search_forward);
2798 defsubr (&Sposix_search_backward);
2799 defsubr (&Sreplace_match);
2800 defsubr (&Smatch_beginning);
2801 defsubr (&Smatch_end);
2802 defsubr (&Smatch_data);
2803 defsubr (&Sset_match_data);
2804 defsubr (&Sregexp_quote);
2805 }