]> code.delx.au - gnu-emacs/blob - src/search.c
(Freplace_match): Fix the loop for copying text
[gnu-emacs] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #ifdef STDC_HEADERS
24 #include <stdlib.h>
25 #endif
26 #include "lisp.h"
27 #include "syntax.h"
28 #include "category.h"
29 #include "buffer.h"
30 #include "charset.h"
31 #include "region-cache.h"
32 #include "commands.h"
33 #include "blockinput.h"
34 #include "intervals.h"
35
36 #include <sys/types.h>
37 #include "regex.h"
38
39 #define min(a, b) ((a) < (b) ? (a) : (b))
40 #define max(a, b) ((a) > (b) ? (a) : (b))
41
42 #define REGEXP_CACHE_SIZE 20
43
44 /* If the regexp is non-nil, then the buffer contains the compiled form
45 of that regexp, suitable for searching. */
46 struct regexp_cache
47 {
48 struct regexp_cache *next;
49 Lisp_Object regexp;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
54 };
55
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
58
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache *searchbuf_head;
61
62
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
81
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
86
87 /* error condition signaled when regexp compile_pattern fails */
88
89 Lisp_Object Qinvalid_regexp;
90
91 static void set_search_regs ();
92 static void save_search_regs ();
93 static int simple_search ();
94 static int boyer_moore ();
95 static int search_buffer ();
96
97 static void
98 matcher_overflow ()
99 {
100 error ("Stack overflow in regexp matcher");
101 }
102
103 #ifdef __STDC__
104 #define CONST const
105 #else
106 #define CONST
107 #endif
108
109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
112 TRANSLATE is a translation table for ignoring case, or nil for none.
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
118 for this pattern. 0 means backtrack only enough to get a valid match.
119 MULTIBYTE is nonzero if we want to handle multibyte characters in
120 PATTERN. 0 means all multibyte characters are recognized just as
121 sequences of binary data. */
122
123 static void
124 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
125 struct regexp_cache *cp;
126 Lisp_Object pattern;
127 Lisp_Object translate;
128 struct re_registers *regp;
129 int posix;
130 int multibyte;
131 {
132 unsigned char *raw_pattern;
133 int raw_pattern_size;
134 char *val;
135 reg_syntax_t old;
136
137 /* MULTIBYTE says whether the text to be searched is multibyte.
138 We must convert PATTERN to match that, or we will not really
139 find things right. */
140
141 if (multibyte == STRING_MULTIBYTE (pattern))
142 {
143 raw_pattern = (unsigned char *) XSTRING (pattern)->data;
144 raw_pattern_size = STRING_BYTES (XSTRING (pattern));
145 }
146 else if (multibyte)
147 {
148 raw_pattern_size = count_size_as_multibyte (XSTRING (pattern)->data,
149 XSTRING (pattern)->size);
150 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
151 copy_text (XSTRING (pattern)->data, raw_pattern,
152 XSTRING (pattern)->size, 0, 1);
153 }
154 else
155 {
156 /* Converting multibyte to single-byte.
157
158 ??? Perhaps this conversion should be done in a special way
159 by subtracting nonascii-insert-offset from each non-ASCII char,
160 so that only the multibyte chars which really correspond to
161 the chosen single-byte character set can possibly match. */
162 raw_pattern_size = XSTRING (pattern)->size;
163 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
164 copy_text (XSTRING (pattern)->data, raw_pattern,
165 STRING_BYTES (XSTRING (pattern)), 1, 0);
166 }
167
168 cp->regexp = Qnil;
169 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
170 cp->posix = posix;
171 cp->buf.multibyte = multibyte;
172 BLOCK_INPUT;
173 old = re_set_syntax (RE_SYNTAX_EMACS
174 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
175 val = (char *) re_compile_pattern ((char *)raw_pattern,
176 raw_pattern_size, &cp->buf);
177 re_set_syntax (old);
178 UNBLOCK_INPUT;
179 if (val)
180 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
181
182 cp->regexp = Fcopy_sequence (pattern);
183 }
184
185 /* Compile a regexp if necessary, but first check to see if there's one in
186 the cache.
187 PATTERN is the pattern to compile.
188 TRANSLATE is a translation table for ignoring case, or nil for none.
189 REGP is the structure that says where to store the "register"
190 values that will result from matching this pattern.
191 If it is 0, we should compile the pattern not to record any
192 subexpression bounds.
193 POSIX is nonzero if we want full backtracking (POSIX style)
194 for this pattern. 0 means backtrack only enough to get a valid match. */
195
196 struct re_pattern_buffer *
197 compile_pattern (pattern, regp, translate, posix, multibyte)
198 Lisp_Object pattern;
199 struct re_registers *regp;
200 Lisp_Object translate;
201 int posix, multibyte;
202 {
203 struct regexp_cache *cp, **cpp;
204
205 for (cpp = &searchbuf_head; ; cpp = &cp->next)
206 {
207 cp = *cpp;
208 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
209 && !NILP (Fstring_equal (cp->regexp, pattern))
210 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
211 && cp->posix == posix
212 && cp->buf.multibyte == multibyte)
213 break;
214
215 /* If we're at the end of the cache, compile into the last cell. */
216 if (cp->next == 0)
217 {
218 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
219 break;
220 }
221 }
222
223 /* When we get here, cp (aka *cpp) contains the compiled pattern,
224 either because we found it in the cache or because we just compiled it.
225 Move it to the front of the queue to mark it as most recently used. */
226 *cpp = cp->next;
227 cp->next = searchbuf_head;
228 searchbuf_head = cp;
229
230 /* Advise the searching functions about the space we have allocated
231 for register data. */
232 if (regp)
233 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
234
235 return &cp->buf;
236 }
237
238 /* Error condition used for failing searches */
239 Lisp_Object Qsearch_failed;
240
241 Lisp_Object
242 signal_failure (arg)
243 Lisp_Object arg;
244 {
245 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
246 return Qnil;
247 }
248 \f
249 static Lisp_Object
250 looking_at_1 (string, posix)
251 Lisp_Object string;
252 int posix;
253 {
254 Lisp_Object val;
255 unsigned char *p1, *p2;
256 int s1, s2;
257 register int i;
258 struct re_pattern_buffer *bufp;
259
260 if (running_asynch_code)
261 save_search_regs ();
262
263 CHECK_STRING (string, 0);
264 bufp = compile_pattern (string, &search_regs,
265 (!NILP (current_buffer->case_fold_search)
266 ? DOWNCASE_TABLE : Qnil),
267 posix,
268 !NILP (current_buffer->enable_multibyte_characters));
269
270 immediate_quit = 1;
271 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
272
273 /* Get pointers and sizes of the two strings
274 that make up the visible portion of the buffer. */
275
276 p1 = BEGV_ADDR;
277 s1 = GPT_BYTE - BEGV_BYTE;
278 p2 = GAP_END_ADDR;
279 s2 = ZV_BYTE - GPT_BYTE;
280 if (s1 < 0)
281 {
282 p2 = p1;
283 s2 = ZV_BYTE - BEGV_BYTE;
284 s1 = 0;
285 }
286 if (s2 < 0)
287 {
288 s1 = ZV_BYTE - BEGV_BYTE;
289 s2 = 0;
290 }
291
292 re_match_object = Qnil;
293
294 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
295 PT_BYTE - BEGV_BYTE, &search_regs,
296 ZV_BYTE - BEGV_BYTE);
297 if (i == -2)
298 matcher_overflow ();
299
300 val = (0 <= i ? Qt : Qnil);
301 if (i >= 0)
302 for (i = 0; i < search_regs.num_regs; i++)
303 if (search_regs.start[i] >= 0)
304 {
305 search_regs.start[i]
306 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
307 search_regs.end[i]
308 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
309 }
310 XSETBUFFER (last_thing_searched, current_buffer);
311 immediate_quit = 0;
312 return val;
313 }
314
315 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
316 "Return t if text after point matches regular expression REGEXP.\n\
317 This function modifies the match data that `match-beginning',\n\
318 `match-end' and `match-data' access; save and restore the match\n\
319 data if you want to preserve them.")
320 (regexp)
321 Lisp_Object regexp;
322 {
323 return looking_at_1 (regexp, 0);
324 }
325
326 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
327 "Return t if text after point matches regular expression REGEXP.\n\
328 Find the longest match, in accord with Posix regular expression rules.\n\
329 This function modifies the match data that `match-beginning',\n\
330 `match-end' and `match-data' access; save and restore the match\n\
331 data if you want to preserve them.")
332 (regexp)
333 Lisp_Object regexp;
334 {
335 return looking_at_1 (regexp, 1);
336 }
337 \f
338 static Lisp_Object
339 string_match_1 (regexp, string, start, posix)
340 Lisp_Object regexp, string, start;
341 int posix;
342 {
343 int val;
344 struct re_pattern_buffer *bufp;
345 int pos, pos_byte;
346 int i;
347
348 if (running_asynch_code)
349 save_search_regs ();
350
351 CHECK_STRING (regexp, 0);
352 CHECK_STRING (string, 1);
353
354 if (NILP (start))
355 pos = 0, pos_byte = 0;
356 else
357 {
358 int len = XSTRING (string)->size;
359
360 CHECK_NUMBER (start, 2);
361 pos = XINT (start);
362 if (pos < 0 && -pos <= len)
363 pos = len + pos;
364 else if (0 > pos || pos > len)
365 args_out_of_range (string, start);
366 pos_byte = string_char_to_byte (string, pos);
367 }
368
369 bufp = compile_pattern (regexp, &search_regs,
370 (!NILP (current_buffer->case_fold_search)
371 ? DOWNCASE_TABLE : Qnil),
372 posix,
373 STRING_MULTIBYTE (string));
374 immediate_quit = 1;
375 re_match_object = string;
376
377 val = re_search (bufp, (char *) XSTRING (string)->data,
378 STRING_BYTES (XSTRING (string)), pos_byte,
379 STRING_BYTES (XSTRING (string)) - pos_byte,
380 &search_regs);
381 immediate_quit = 0;
382 last_thing_searched = Qt;
383 if (val == -2)
384 matcher_overflow ();
385 if (val < 0) return Qnil;
386
387 for (i = 0; i < search_regs.num_regs; i++)
388 if (search_regs.start[i] >= 0)
389 {
390 search_regs.start[i]
391 = string_byte_to_char (string, search_regs.start[i]);
392 search_regs.end[i]
393 = string_byte_to_char (string, search_regs.end[i]);
394 }
395
396 return make_number (string_byte_to_char (string, val));
397 }
398
399 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
400 "Return index of start of first match for REGEXP in STRING, or nil.\n\
401 If third arg START is non-nil, start search at that index in STRING.\n\
402 For index of first char beyond the match, do (match-end 0).\n\
403 `match-end' and `match-beginning' also give indices of substrings\n\
404 matched by parenthesis constructs in the pattern.")
405 (regexp, string, start)
406 Lisp_Object regexp, string, start;
407 {
408 return string_match_1 (regexp, string, start, 0);
409 }
410
411 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
412 "Return index of start of first match for REGEXP in STRING, or nil.\n\
413 Find the longest match, in accord with Posix regular expression rules.\n\
414 If third arg START is non-nil, start search at that index in STRING.\n\
415 For index of first char beyond the match, do (match-end 0).\n\
416 `match-end' and `match-beginning' also give indices of substrings\n\
417 matched by parenthesis constructs in the pattern.")
418 (regexp, string, start)
419 Lisp_Object regexp, string, start;
420 {
421 return string_match_1 (regexp, string, start, 1);
422 }
423
424 /* Match REGEXP against STRING, searching all of STRING,
425 and return the index of the match, or negative on failure.
426 This does not clobber the match data. */
427
428 int
429 fast_string_match (regexp, string)
430 Lisp_Object regexp, string;
431 {
432 int val;
433 struct re_pattern_buffer *bufp;
434
435 bufp = compile_pattern (regexp, 0, Qnil,
436 0, STRING_MULTIBYTE (string));
437 immediate_quit = 1;
438 re_match_object = string;
439
440 val = re_search (bufp, (char *) XSTRING (string)->data,
441 STRING_BYTES (XSTRING (string)), 0,
442 STRING_BYTES (XSTRING (string)), 0);
443 immediate_quit = 0;
444 return val;
445 }
446
447 /* Match REGEXP against STRING, searching all of STRING ignoring case,
448 and return the index of the match, or negative on failure.
449 This does not clobber the match data.
450 We assume that STRING contains single-byte characters. */
451
452 extern Lisp_Object Vascii_downcase_table;
453
454 int
455 fast_c_string_match_ignore_case (regexp, string)
456 Lisp_Object regexp;
457 char *string;
458 {
459 int val;
460 struct re_pattern_buffer *bufp;
461 int len = strlen (string);
462
463 regexp = string_make_unibyte (regexp);
464 re_match_object = Qt;
465 bufp = compile_pattern (regexp, 0,
466 Vascii_downcase_table, 0,
467 0);
468 immediate_quit = 1;
469 val = re_search (bufp, string, len, 0, len, 0);
470 immediate_quit = 0;
471 return val;
472 }
473 \f
474 /* The newline cache: remembering which sections of text have no newlines. */
475
476 /* If the user has requested newline caching, make sure it's on.
477 Otherwise, make sure it's off.
478 This is our cheezy way of associating an action with the change of
479 state of a buffer-local variable. */
480 static void
481 newline_cache_on_off (buf)
482 struct buffer *buf;
483 {
484 if (NILP (buf->cache_long_line_scans))
485 {
486 /* It should be off. */
487 if (buf->newline_cache)
488 {
489 free_region_cache (buf->newline_cache);
490 buf->newline_cache = 0;
491 }
492 }
493 else
494 {
495 /* It should be on. */
496 if (buf->newline_cache == 0)
497 buf->newline_cache = new_region_cache ();
498 }
499 }
500
501 \f
502 /* Search for COUNT instances of the character TARGET between START and END.
503
504 If COUNT is positive, search forwards; END must be >= START.
505 If COUNT is negative, search backwards for the -COUNTth instance;
506 END must be <= START.
507 If COUNT is zero, do anything you please; run rogue, for all I care.
508
509 If END is zero, use BEGV or ZV instead, as appropriate for the
510 direction indicated by COUNT.
511
512 If we find COUNT instances, set *SHORTAGE to zero, and return the
513 position after the COUNTth match. Note that for reverse motion
514 this is not the same as the usual convention for Emacs motion commands.
515
516 If we don't find COUNT instances before reaching END, set *SHORTAGE
517 to the number of TARGETs left unfound, and return END.
518
519 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
520 except when inside redisplay. */
521
522 int
523 scan_buffer (target, start, end, count, shortage, allow_quit)
524 register int target;
525 int start, end;
526 int count;
527 int *shortage;
528 int allow_quit;
529 {
530 struct region_cache *newline_cache;
531 int direction;
532
533 if (count > 0)
534 {
535 direction = 1;
536 if (! end) end = ZV;
537 }
538 else
539 {
540 direction = -1;
541 if (! end) end = BEGV;
542 }
543
544 newline_cache_on_off (current_buffer);
545 newline_cache = current_buffer->newline_cache;
546
547 if (shortage != 0)
548 *shortage = 0;
549
550 immediate_quit = allow_quit;
551
552 if (count > 0)
553 while (start != end)
554 {
555 /* Our innermost scanning loop is very simple; it doesn't know
556 about gaps, buffer ends, or the newline cache. ceiling is
557 the position of the last character before the next such
558 obstacle --- the last character the dumb search loop should
559 examine. */
560 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
561 int start_byte = CHAR_TO_BYTE (start);
562 int tem;
563
564 /* If we're looking for a newline, consult the newline cache
565 to see where we can avoid some scanning. */
566 if (target == '\n' && newline_cache)
567 {
568 int next_change;
569 immediate_quit = 0;
570 while (region_cache_forward
571 (current_buffer, newline_cache, start_byte, &next_change))
572 start_byte = next_change;
573 immediate_quit = allow_quit;
574
575 /* START should never be after END. */
576 if (start_byte > ceiling_byte)
577 start_byte = ceiling_byte;
578
579 /* Now the text after start is an unknown region, and
580 next_change is the position of the next known region. */
581 ceiling_byte = min (next_change - 1, ceiling_byte);
582 }
583
584 /* The dumb loop can only scan text stored in contiguous
585 bytes. BUFFER_CEILING_OF returns the last character
586 position that is contiguous, so the ceiling is the
587 position after that. */
588 tem = BUFFER_CEILING_OF (start_byte);
589 ceiling_byte = min (tem, ceiling_byte);
590
591 {
592 /* The termination address of the dumb loop. */
593 register unsigned char *ceiling_addr
594 = BYTE_POS_ADDR (ceiling_byte) + 1;
595 register unsigned char *cursor
596 = BYTE_POS_ADDR (start_byte);
597 unsigned char *base = cursor;
598
599 while (cursor < ceiling_addr)
600 {
601 unsigned char *scan_start = cursor;
602
603 /* The dumb loop. */
604 while (*cursor != target && ++cursor < ceiling_addr)
605 ;
606
607 /* If we're looking for newlines, cache the fact that
608 the region from start to cursor is free of them. */
609 if (target == '\n' && newline_cache)
610 know_region_cache (current_buffer, newline_cache,
611 start_byte + scan_start - base,
612 start_byte + cursor - base);
613
614 /* Did we find the target character? */
615 if (cursor < ceiling_addr)
616 {
617 if (--count == 0)
618 {
619 immediate_quit = 0;
620 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
621 }
622 cursor++;
623 }
624 }
625
626 start = BYTE_TO_CHAR (start_byte + cursor - base);
627 }
628 }
629 else
630 while (start > end)
631 {
632 /* The last character to check before the next obstacle. */
633 int ceiling_byte = CHAR_TO_BYTE (end);
634 int start_byte = CHAR_TO_BYTE (start);
635 int tem;
636
637 /* Consult the newline cache, if appropriate. */
638 if (target == '\n' && newline_cache)
639 {
640 int next_change;
641 immediate_quit = 0;
642 while (region_cache_backward
643 (current_buffer, newline_cache, start_byte, &next_change))
644 start_byte = next_change;
645 immediate_quit = allow_quit;
646
647 /* Start should never be at or before end. */
648 if (start_byte <= ceiling_byte)
649 start_byte = ceiling_byte + 1;
650
651 /* Now the text before start is an unknown region, and
652 next_change is the position of the next known region. */
653 ceiling_byte = max (next_change, ceiling_byte);
654 }
655
656 /* Stop scanning before the gap. */
657 tem = BUFFER_FLOOR_OF (start_byte - 1);
658 ceiling_byte = max (tem, ceiling_byte);
659
660 {
661 /* The termination address of the dumb loop. */
662 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
663 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
664 unsigned char *base = cursor;
665
666 while (cursor >= ceiling_addr)
667 {
668 unsigned char *scan_start = cursor;
669
670 while (*cursor != target && --cursor >= ceiling_addr)
671 ;
672
673 /* If we're looking for newlines, cache the fact that
674 the region from after the cursor to start is free of them. */
675 if (target == '\n' && newline_cache)
676 know_region_cache (current_buffer, newline_cache,
677 start_byte + cursor - base,
678 start_byte + scan_start - base);
679
680 /* Did we find the target character? */
681 if (cursor >= ceiling_addr)
682 {
683 if (++count >= 0)
684 {
685 immediate_quit = 0;
686 return BYTE_TO_CHAR (start_byte + cursor - base);
687 }
688 cursor--;
689 }
690 }
691
692 start = BYTE_TO_CHAR (start_byte + cursor - base);
693 }
694 }
695
696 immediate_quit = 0;
697 if (shortage != 0)
698 *shortage = count * direction;
699 return start;
700 }
701 \f
702 /* Search for COUNT instances of a line boundary, which means either a
703 newline or (if selective display enabled) a carriage return.
704 Start at START. If COUNT is negative, search backwards.
705
706 We report the resulting position by calling TEMP_SET_PT_BOTH.
707
708 If we find COUNT instances. we position after (always after,
709 even if scanning backwards) the COUNTth match, and return 0.
710
711 If we don't find COUNT instances before reaching the end of the
712 buffer (or the beginning, if scanning backwards), we return
713 the number of line boundaries left unfound, and position at
714 the limit we bumped up against.
715
716 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
717 except in special cases. */
718
719 int
720 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
721 int start, start_byte;
722 int limit, limit_byte;
723 register int count;
724 int allow_quit;
725 {
726 int direction = ((count > 0) ? 1 : -1);
727
728 register unsigned char *cursor;
729 unsigned char *base;
730
731 register int ceiling;
732 register unsigned char *ceiling_addr;
733
734 int old_immediate_quit = immediate_quit;
735
736 /* If we are not in selective display mode,
737 check only for newlines. */
738 int selective_display = (!NILP (current_buffer->selective_display)
739 && !INTEGERP (current_buffer->selective_display));
740
741 /* The code that follows is like scan_buffer
742 but checks for either newline or carriage return. */
743
744 if (allow_quit)
745 immediate_quit++;
746
747 start_byte = CHAR_TO_BYTE (start);
748
749 if (count > 0)
750 {
751 while (start_byte < limit_byte)
752 {
753 ceiling = BUFFER_CEILING_OF (start_byte);
754 ceiling = min (limit_byte - 1, ceiling);
755 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
756 base = (cursor = BYTE_POS_ADDR (start_byte));
757 while (1)
758 {
759 while (*cursor != '\n' && ++cursor != ceiling_addr)
760 ;
761
762 if (cursor != ceiling_addr)
763 {
764 if (--count == 0)
765 {
766 immediate_quit = old_immediate_quit;
767 start_byte = start_byte + cursor - base + 1;
768 start = BYTE_TO_CHAR (start_byte);
769 TEMP_SET_PT_BOTH (start, start_byte);
770 return 0;
771 }
772 else
773 if (++cursor == ceiling_addr)
774 break;
775 }
776 else
777 break;
778 }
779 start_byte += cursor - base;
780 }
781 }
782 else
783 {
784 while (start_byte > limit_byte)
785 {
786 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
787 ceiling = max (limit_byte, ceiling);
788 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
789 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
790 while (1)
791 {
792 while (--cursor != ceiling_addr && *cursor != '\n')
793 ;
794
795 if (cursor != ceiling_addr)
796 {
797 if (++count == 0)
798 {
799 immediate_quit = old_immediate_quit;
800 /* Return the position AFTER the match we found. */
801 start_byte = start_byte + cursor - base + 1;
802 start = BYTE_TO_CHAR (start_byte);
803 TEMP_SET_PT_BOTH (start, start_byte);
804 return 0;
805 }
806 }
807 else
808 break;
809 }
810 /* Here we add 1 to compensate for the last decrement
811 of CURSOR, which took it past the valid range. */
812 start_byte += cursor - base + 1;
813 }
814 }
815
816 TEMP_SET_PT_BOTH (limit, limit_byte);
817 immediate_quit = old_immediate_quit;
818
819 return count * direction;
820 }
821
822 int
823 find_next_newline_no_quit (from, cnt)
824 register int from, cnt;
825 {
826 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
827 }
828
829 /* Like find_next_newline, but returns position before the newline,
830 not after, and only search up to TO. This isn't just
831 find_next_newline (...)-1, because you might hit TO. */
832
833 int
834 find_before_next_newline (from, to, cnt)
835 int from, to, cnt;
836 {
837 int shortage;
838 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
839
840 if (shortage == 0)
841 pos--;
842
843 return pos;
844 }
845 \f
846 /* Subroutines of Lisp buffer search functions. */
847
848 static Lisp_Object
849 search_command (string, bound, noerror, count, direction, RE, posix)
850 Lisp_Object string, bound, noerror, count;
851 int direction;
852 int RE;
853 int posix;
854 {
855 register int np;
856 int lim, lim_byte;
857 int n = direction;
858
859 if (!NILP (count))
860 {
861 CHECK_NUMBER (count, 3);
862 n *= XINT (count);
863 }
864
865 CHECK_STRING (string, 0);
866 if (NILP (bound))
867 {
868 if (n > 0)
869 lim = ZV, lim_byte = ZV_BYTE;
870 else
871 lim = BEGV, lim_byte = BEGV_BYTE;
872 }
873 else
874 {
875 CHECK_NUMBER_COERCE_MARKER (bound, 1);
876 lim = XINT (bound);
877 if (n > 0 ? lim < PT : lim > PT)
878 error ("Invalid search bound (wrong side of point)");
879 if (lim > ZV)
880 lim = ZV, lim_byte = ZV_BYTE;
881 else if (lim < BEGV)
882 lim = BEGV, lim_byte = BEGV_BYTE;
883 else
884 lim_byte = CHAR_TO_BYTE (lim);
885 }
886
887 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
888 (!NILP (current_buffer->case_fold_search)
889 ? current_buffer->case_canon_table
890 : Qnil),
891 (!NILP (current_buffer->case_fold_search)
892 ? current_buffer->case_eqv_table
893 : Qnil),
894 posix);
895 if (np <= 0)
896 {
897 if (NILP (noerror))
898 return signal_failure (string);
899 if (!EQ (noerror, Qt))
900 {
901 if (lim < BEGV || lim > ZV)
902 abort ();
903 SET_PT_BOTH (lim, lim_byte);
904 return Qnil;
905 #if 0 /* This would be clean, but maybe programs depend on
906 a value of nil here. */
907 np = lim;
908 #endif
909 }
910 else
911 return Qnil;
912 }
913
914 if (np < BEGV || np > ZV)
915 abort ();
916
917 SET_PT (np);
918
919 return make_number (np);
920 }
921 \f
922 /* Return 1 if REGEXP it matches just one constant string. */
923
924 static int
925 trivial_regexp_p (regexp)
926 Lisp_Object regexp;
927 {
928 int len = STRING_BYTES (XSTRING (regexp));
929 unsigned char *s = XSTRING (regexp)->data;
930 unsigned char c;
931 while (--len >= 0)
932 {
933 switch (*s++)
934 {
935 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
936 return 0;
937 case '\\':
938 if (--len < 0)
939 return 0;
940 switch (*s++)
941 {
942 case '|': case '(': case ')': case '`': case '\'': case 'b':
943 case 'B': case '<': case '>': case 'w': case 'W': case 's':
944 case 'S': case '=':
945 case 'c': case 'C': /* for categoryspec and notcategoryspec */
946 case '1': case '2': case '3': case '4': case '5':
947 case '6': case '7': case '8': case '9':
948 return 0;
949 }
950 }
951 }
952 return 1;
953 }
954
955 /* Search for the n'th occurrence of STRING in the current buffer,
956 starting at position POS and stopping at position LIM,
957 treating STRING as a literal string if RE is false or as
958 a regular expression if RE is true.
959
960 If N is positive, searching is forward and LIM must be greater than POS.
961 If N is negative, searching is backward and LIM must be less than POS.
962
963 Returns -x if x occurrences remain to be found (x > 0),
964 or else the position at the beginning of the Nth occurrence
965 (if searching backward) or the end (if searching forward).
966
967 POSIX is nonzero if we want full backtracking (POSIX style)
968 for this pattern. 0 means backtrack only enough to get a valid match. */
969
970 #define TRANSLATE(out, trt, d) \
971 do \
972 { \
973 if (! NILP (trt)) \
974 { \
975 Lisp_Object temp; \
976 temp = Faref (trt, make_number (d)); \
977 if (INTEGERP (temp)) \
978 out = XINT (temp); \
979 else \
980 out = d; \
981 } \
982 else \
983 out = d; \
984 } \
985 while (0)
986
987 static int
988 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
989 RE, trt, inverse_trt, posix)
990 Lisp_Object string;
991 int pos;
992 int pos_byte;
993 int lim;
994 int lim_byte;
995 int n;
996 int RE;
997 Lisp_Object trt;
998 Lisp_Object inverse_trt;
999 int posix;
1000 {
1001 int len = XSTRING (string)->size;
1002 int len_byte = STRING_BYTES (XSTRING (string));
1003 register int i;
1004
1005 if (running_asynch_code)
1006 save_search_regs ();
1007
1008 /* Null string is found at starting position. */
1009 if (len == 0)
1010 {
1011 set_search_regs (pos, 0);
1012 return pos;
1013 }
1014
1015 /* Searching 0 times means don't move. */
1016 if (n == 0)
1017 return pos;
1018
1019 if (RE && !trivial_regexp_p (string))
1020 {
1021 unsigned char *p1, *p2;
1022 int s1, s2;
1023 struct re_pattern_buffer *bufp;
1024
1025 bufp = compile_pattern (string, &search_regs, trt, posix,
1026 !NILP (current_buffer->enable_multibyte_characters));
1027
1028 immediate_quit = 1; /* Quit immediately if user types ^G,
1029 because letting this function finish
1030 can take too long. */
1031 QUIT; /* Do a pending quit right away,
1032 to avoid paradoxical behavior */
1033 /* Get pointers and sizes of the two strings
1034 that make up the visible portion of the buffer. */
1035
1036 p1 = BEGV_ADDR;
1037 s1 = GPT_BYTE - BEGV_BYTE;
1038 p2 = GAP_END_ADDR;
1039 s2 = ZV_BYTE - GPT_BYTE;
1040 if (s1 < 0)
1041 {
1042 p2 = p1;
1043 s2 = ZV_BYTE - BEGV_BYTE;
1044 s1 = 0;
1045 }
1046 if (s2 < 0)
1047 {
1048 s1 = ZV_BYTE - BEGV_BYTE;
1049 s2 = 0;
1050 }
1051 re_match_object = Qnil;
1052
1053 while (n < 0)
1054 {
1055 int val;
1056 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1057 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1058 &search_regs,
1059 /* Don't allow match past current point */
1060 pos_byte - BEGV_BYTE);
1061 if (val == -2)
1062 {
1063 matcher_overflow ();
1064 }
1065 if (val >= 0)
1066 {
1067 pos_byte = search_regs.start[0] + BEGV_BYTE;
1068 for (i = 0; i < search_regs.num_regs; i++)
1069 if (search_regs.start[i] >= 0)
1070 {
1071 search_regs.start[i]
1072 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1073 search_regs.end[i]
1074 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1075 }
1076 XSETBUFFER (last_thing_searched, current_buffer);
1077 /* Set pos to the new position. */
1078 pos = search_regs.start[0];
1079 }
1080 else
1081 {
1082 immediate_quit = 0;
1083 return (n);
1084 }
1085 n++;
1086 }
1087 while (n > 0)
1088 {
1089 int val;
1090 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1091 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1092 &search_regs,
1093 lim_byte - BEGV_BYTE);
1094 if (val == -2)
1095 {
1096 matcher_overflow ();
1097 }
1098 if (val >= 0)
1099 {
1100 pos_byte = search_regs.end[0] + BEGV_BYTE;
1101 for (i = 0; i < search_regs.num_regs; i++)
1102 if (search_regs.start[i] >= 0)
1103 {
1104 search_regs.start[i]
1105 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1106 search_regs.end[i]
1107 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1108 }
1109 XSETBUFFER (last_thing_searched, current_buffer);
1110 pos = search_regs.end[0];
1111 }
1112 else
1113 {
1114 immediate_quit = 0;
1115 return (0 - n);
1116 }
1117 n--;
1118 }
1119 immediate_quit = 0;
1120 return (pos);
1121 }
1122 else /* non-RE case */
1123 {
1124 unsigned char *raw_pattern, *pat;
1125 int raw_pattern_size;
1126 int raw_pattern_size_byte;
1127 unsigned char *patbuf;
1128 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1129 unsigned char *base_pat = XSTRING (string)->data;
1130 int charset_base = -1;
1131 int simple = 1;
1132
1133 /* MULTIBYTE says whether the text to be searched is multibyte.
1134 We must convert PATTERN to match that, or we will not really
1135 find things right. */
1136
1137 if (multibyte == STRING_MULTIBYTE (string))
1138 {
1139 raw_pattern = (unsigned char *) XSTRING (string)->data;
1140 raw_pattern_size = XSTRING (string)->size;
1141 raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
1142 }
1143 else if (multibyte)
1144 {
1145 raw_pattern_size = XSTRING (string)->size;
1146 raw_pattern_size_byte
1147 = count_size_as_multibyte (XSTRING (string)->data,
1148 raw_pattern_size);
1149 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1150 copy_text (XSTRING (string)->data, raw_pattern,
1151 XSTRING (string)->size, 0, 1);
1152 }
1153 else
1154 {
1155 /* Converting multibyte to single-byte.
1156
1157 ??? Perhaps this conversion should be done in a special way
1158 by subtracting nonascii-insert-offset from each non-ASCII char,
1159 so that only the multibyte chars which really correspond to
1160 the chosen single-byte character set can possibly match. */
1161 raw_pattern_size = XSTRING (string)->size;
1162 raw_pattern_size_byte = XSTRING (string)->size;
1163 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1164 copy_text (XSTRING (string)->data, raw_pattern,
1165 STRING_BYTES (XSTRING (string)), 1, 0);
1166 }
1167
1168 /* Copy and optionally translate the pattern. */
1169 len = raw_pattern_size;
1170 len_byte = raw_pattern_size_byte;
1171 patbuf = (unsigned char *) alloca (len_byte);
1172 pat = patbuf;
1173 base_pat = raw_pattern;
1174 if (multibyte)
1175 {
1176 while (--len >= 0)
1177 {
1178 unsigned char workbuf[4], *str;
1179 int c, translated, inverse;
1180 int in_charlen, charlen;
1181
1182 /* If we got here and the RE flag is set, it's because we're
1183 dealing with a regexp known to be trivial, so the backslash
1184 just quotes the next character. */
1185 if (RE && *base_pat == '\\')
1186 {
1187 len--;
1188 len_byte--;
1189 base_pat++;
1190 }
1191
1192 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1193 /* Translate the character, if requested. */
1194 TRANSLATE (translated, trt, c);
1195 /* If translation changed the byte-length, go back
1196 to the original character. */
1197 charlen = CHAR_STRING (translated, workbuf, str);
1198 if (in_charlen != charlen)
1199 {
1200 translated = c;
1201 charlen = CHAR_STRING (c, workbuf, str);
1202 }
1203
1204 TRANSLATE (inverse, inverse_trt, c);
1205
1206 /* Did this char actually get translated?
1207 Would any other char get translated into it? */
1208 if (translated != c || inverse != c)
1209 {
1210 /* Keep track of which character set row
1211 contains the characters that need translation. */
1212 int charset_base_code = c & ~0xff;
1213 if (charset_base == -1)
1214 charset_base = charset_base_code;
1215 else if (charset_base != charset_base_code)
1216 /* If two different rows appear, needing translation,
1217 then we cannot use boyer_moore search. */
1218 simple = 0;
1219 /* ??? Handa: this must do simple = 0
1220 if c is a composite character. */
1221 }
1222
1223 /* Store this character into the translated pattern. */
1224 bcopy (str, pat, charlen);
1225 pat += charlen;
1226 base_pat += in_charlen;
1227 len_byte -= in_charlen;
1228 }
1229 }
1230 else
1231 {
1232 while (--len >= 0)
1233 {
1234 int c, translated, inverse;
1235
1236 /* If we got here and the RE flag is set, it's because we're
1237 dealing with a regexp known to be trivial, so the backslash
1238 just quotes the next character. */
1239 if (RE && *base_pat == '\\')
1240 {
1241 len--;
1242 base_pat++;
1243 }
1244 c = *base_pat++;
1245 TRANSLATE (translated, trt, c);
1246 TRANSLATE (inverse, inverse_trt, c);
1247
1248 /* Did this char actually get translated?
1249 Would any other char get translated into it? */
1250 if (translated != c || inverse != c)
1251 {
1252 /* Keep track of which character set row
1253 contains the characters that need translation. */
1254 int charset_base_code = c & ~0xff;
1255 if (charset_base == -1)
1256 charset_base = charset_base_code;
1257 else if (charset_base != charset_base_code)
1258 /* If two different rows appear, needing translation,
1259 then we cannot use boyer_moore search. */
1260 simple = 0;
1261 }
1262 *pat++ = translated;
1263 }
1264 }
1265
1266 len_byte = pat - patbuf;
1267 len = raw_pattern_size;
1268 pat = base_pat = patbuf;
1269
1270 if (simple)
1271 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1272 pos, pos_byte, lim, lim_byte,
1273 charset_base);
1274 else
1275 return simple_search (n, pat, len, len_byte, trt,
1276 pos, pos_byte, lim, lim_byte);
1277 }
1278 }
1279 \f
1280 /* Do a simple string search N times for the string PAT,
1281 whose length is LEN/LEN_BYTE,
1282 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1283 TRT is the translation table.
1284
1285 Return the character position where the match is found.
1286 Otherwise, if M matches remained to be found, return -M.
1287
1288 This kind of search works regardless of what is in PAT and
1289 regardless of what is in TRT. It is used in cases where
1290 boyer_moore cannot work. */
1291
1292 static int
1293 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1294 int n;
1295 unsigned char *pat;
1296 int len, len_byte;
1297 Lisp_Object trt;
1298 int pos, pos_byte;
1299 int lim, lim_byte;
1300 {
1301 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1302 int forward = n > 0;
1303
1304 if (lim > pos && multibyte)
1305 while (n > 0)
1306 {
1307 while (1)
1308 {
1309 /* Try matching at position POS. */
1310 int this_pos = pos;
1311 int this_pos_byte = pos_byte;
1312 int this_len = len;
1313 int this_len_byte = len_byte;
1314 unsigned char *p = pat;
1315 if (pos + len > lim)
1316 goto stop;
1317
1318 while (this_len > 0)
1319 {
1320 int charlen, buf_charlen;
1321 int pat_ch, buf_ch;
1322
1323 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1324 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1325 ZV_BYTE - this_pos_byte,
1326 buf_charlen);
1327 TRANSLATE (buf_ch, trt, buf_ch);
1328
1329 if (buf_ch != pat_ch)
1330 break;
1331
1332 this_len_byte -= charlen;
1333 this_len--;
1334 p += charlen;
1335
1336 this_pos_byte += buf_charlen;
1337 this_pos++;
1338 }
1339
1340 if (this_len == 0)
1341 {
1342 pos += len;
1343 pos_byte += len_byte;
1344 break;
1345 }
1346
1347 INC_BOTH (pos, pos_byte);
1348 }
1349
1350 n--;
1351 }
1352 else if (lim > pos)
1353 while (n > 0)
1354 {
1355 while (1)
1356 {
1357 /* Try matching at position POS. */
1358 int this_pos = pos;
1359 int this_len = len;
1360 unsigned char *p = pat;
1361
1362 if (pos + len > lim)
1363 goto stop;
1364
1365 while (this_len > 0)
1366 {
1367 int pat_ch = *p++;
1368 int buf_ch = FETCH_BYTE (this_pos);
1369 TRANSLATE (buf_ch, trt, buf_ch);
1370
1371 if (buf_ch != pat_ch)
1372 break;
1373
1374 this_len--;
1375 this_pos++;
1376 }
1377
1378 if (this_len == 0)
1379 {
1380 pos += len;
1381 break;
1382 }
1383
1384 pos++;
1385 }
1386
1387 n--;
1388 }
1389 /* Backwards search. */
1390 else if (lim < pos && multibyte)
1391 while (n < 0)
1392 {
1393 while (1)
1394 {
1395 /* Try matching at position POS. */
1396 int this_pos = pos - len;
1397 int this_pos_byte = pos_byte - len_byte;
1398 int this_len = len;
1399 int this_len_byte = len_byte;
1400 unsigned char *p = pat;
1401
1402 if (pos - len < lim)
1403 goto stop;
1404
1405 while (this_len > 0)
1406 {
1407 int charlen, buf_charlen;
1408 int pat_ch, buf_ch;
1409
1410 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1411 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1412 ZV_BYTE - this_pos_byte,
1413 buf_charlen);
1414 TRANSLATE (buf_ch, trt, buf_ch);
1415
1416 if (buf_ch != pat_ch)
1417 break;
1418
1419 this_len_byte -= charlen;
1420 this_len--;
1421 p += charlen;
1422 this_pos_byte += buf_charlen;
1423 this_pos++;
1424 }
1425
1426 if (this_len == 0)
1427 {
1428 pos -= len;
1429 pos_byte -= len_byte;
1430 break;
1431 }
1432
1433 DEC_BOTH (pos, pos_byte);
1434 }
1435
1436 n++;
1437 }
1438 else if (lim < pos)
1439 while (n < 0)
1440 {
1441 while (1)
1442 {
1443 /* Try matching at position POS. */
1444 int this_pos = pos - len;
1445 int this_len = len;
1446 unsigned char *p = pat;
1447
1448 if (pos - len < lim)
1449 goto stop;
1450
1451 while (this_len > 0)
1452 {
1453 int pat_ch = *p++;
1454 int buf_ch = FETCH_BYTE (this_pos);
1455 TRANSLATE (buf_ch, trt, buf_ch);
1456
1457 if (buf_ch != pat_ch)
1458 break;
1459 this_len--;
1460 this_pos++;
1461 }
1462
1463 if (this_len == 0)
1464 {
1465 pos -= len;
1466 break;
1467 }
1468
1469 pos--;
1470 }
1471
1472 n++;
1473 }
1474
1475 stop:
1476 if (n == 0)
1477 {
1478 if (forward)
1479 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1480 else
1481 set_search_regs (multibyte ? pos_byte : pos, len_byte);
1482
1483 return pos;
1484 }
1485 else if (n > 0)
1486 return -n;
1487 else
1488 return n;
1489 }
1490 \f
1491 /* Do Boyer-Moore search N times for the string PAT,
1492 whose length is LEN/LEN_BYTE,
1493 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1494 DIRECTION says which direction we search in.
1495 TRT and INVERSE_TRT are translation tables.
1496
1497 This kind of search works if all the characters in PAT that have
1498 nontrivial translation are the same aside from the last byte. This
1499 makes it possible to translate just the last byte of a character,
1500 and do so after just a simple test of the context.
1501
1502 If that criterion is not satisfied, do not call this function. */
1503
1504 static int
1505 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1506 pos, pos_byte, lim, lim_byte, charset_base)
1507 int n;
1508 unsigned char *base_pat;
1509 int len, len_byte;
1510 Lisp_Object trt;
1511 Lisp_Object inverse_trt;
1512 int pos, pos_byte;
1513 int lim, lim_byte;
1514 int charset_base;
1515 {
1516 int direction = ((n > 0) ? 1 : -1);
1517 register int dirlen;
1518 int infinity, limit, k, stride_for_teases;
1519 register int *BM_tab;
1520 int *BM_tab_base;
1521 register unsigned char *cursor, *p_limit;
1522 register int i, j;
1523 unsigned char *pat, *pat_end;
1524 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1525
1526 unsigned char simple_translate[0400];
1527 int translate_prev_byte;
1528 int translate_anteprev_byte;
1529
1530 #ifdef C_ALLOCA
1531 int BM_tab_space[0400];
1532 BM_tab = &BM_tab_space[0];
1533 #else
1534 BM_tab = (int *) alloca (0400 * sizeof (int));
1535 #endif
1536 /* The general approach is that we are going to maintain that we know */
1537 /* the first (closest to the present position, in whatever direction */
1538 /* we're searching) character that could possibly be the last */
1539 /* (furthest from present position) character of a valid match. We */
1540 /* advance the state of our knowledge by looking at that character */
1541 /* and seeing whether it indeed matches the last character of the */
1542 /* pattern. If it does, we take a closer look. If it does not, we */
1543 /* move our pointer (to putative last characters) as far as is */
1544 /* logically possible. This amount of movement, which I call a */
1545 /* stride, will be the length of the pattern if the actual character */
1546 /* appears nowhere in the pattern, otherwise it will be the distance */
1547 /* from the last occurrence of that character to the end of the */
1548 /* pattern. */
1549 /* As a coding trick, an enormous stride is coded into the table for */
1550 /* characters that match the last character. This allows use of only */
1551 /* a single test, a test for having gone past the end of the */
1552 /* permissible match region, to test for both possible matches (when */
1553 /* the stride goes past the end immediately) and failure to */
1554 /* match (where you get nudged past the end one stride at a time). */
1555
1556 /* Here we make a "mickey mouse" BM table. The stride of the search */
1557 /* is determined only by the last character of the putative match. */
1558 /* If that character does not match, we will stride the proper */
1559 /* distance to propose a match that superimposes it on the last */
1560 /* instance of a character that matches it (per trt), or misses */
1561 /* it entirely if there is none. */
1562
1563 dirlen = len_byte * direction;
1564 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1565
1566 /* Record position after the end of the pattern. */
1567 pat_end = base_pat + len_byte;
1568 /* BASE_PAT points to a character that we start scanning from.
1569 It is the first character in a forward search,
1570 the last character in a backward search. */
1571 if (direction < 0)
1572 base_pat = pat_end - 1;
1573
1574 BM_tab_base = BM_tab;
1575 BM_tab += 0400;
1576 j = dirlen; /* to get it in a register */
1577 /* A character that does not appear in the pattern induces a */
1578 /* stride equal to the pattern length. */
1579 while (BM_tab_base != BM_tab)
1580 {
1581 *--BM_tab = j;
1582 *--BM_tab = j;
1583 *--BM_tab = j;
1584 *--BM_tab = j;
1585 }
1586
1587 /* We use this for translation, instead of TRT itself.
1588 We fill this in to handle the characters that actually
1589 occur in the pattern. Others don't matter anyway! */
1590 bzero (simple_translate, sizeof simple_translate);
1591 for (i = 0; i < 0400; i++)
1592 simple_translate[i] = i;
1593
1594 i = 0;
1595 while (i != infinity)
1596 {
1597 unsigned char *ptr = base_pat + i;
1598 i += direction;
1599 if (i == dirlen)
1600 i = infinity;
1601 if (! NILP (trt))
1602 {
1603 int ch;
1604 int untranslated;
1605 int this_translated = 1;
1606
1607 if (multibyte
1608 /* Is *PTR the last byte of a character? */
1609 && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
1610 {
1611 unsigned char *charstart = ptr;
1612 while (! CHAR_HEAD_P (*charstart))
1613 charstart--;
1614 untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
1615 if (charset_base == (untranslated & ~0xff))
1616 {
1617 TRANSLATE (ch, trt, untranslated);
1618 if (! CHAR_HEAD_P (*ptr))
1619 {
1620 translate_prev_byte = ptr[-1];
1621 if (! CHAR_HEAD_P (translate_prev_byte))
1622 translate_anteprev_byte = ptr[-2];
1623 }
1624 }
1625 else
1626 {
1627 this_translated = 0;
1628 ch = *ptr;
1629 }
1630 }
1631 else if (!multibyte)
1632 TRANSLATE (ch, trt, *ptr);
1633 else
1634 {
1635 ch = *ptr;
1636 this_translated = 0;
1637 }
1638
1639 if (ch > 0400)
1640 j = ((unsigned char) ch) | 0200;
1641 else
1642 j = (unsigned char) ch;
1643
1644 if (i == infinity)
1645 stride_for_teases = BM_tab[j];
1646
1647 BM_tab[j] = dirlen - i;
1648 /* A translation table is accompanied by its inverse -- see */
1649 /* comment following downcase_table for details */
1650 if (this_translated)
1651 {
1652 int starting_ch = ch;
1653 int starting_j = j;
1654 while (1)
1655 {
1656 TRANSLATE (ch, inverse_trt, ch);
1657 if (ch > 0400)
1658 j = ((unsigned char) ch) | 0200;
1659 else
1660 j = (unsigned char) ch;
1661
1662 /* For all the characters that map into CH,
1663 set up simple_translate to map the last byte
1664 into STARTING_J. */
1665 simple_translate[j] = starting_j;
1666 if (ch == starting_ch)
1667 break;
1668 BM_tab[j] = dirlen - i;
1669 }
1670 }
1671 }
1672 else
1673 {
1674 j = *ptr;
1675
1676 if (i == infinity)
1677 stride_for_teases = BM_tab[j];
1678 BM_tab[j] = dirlen - i;
1679 }
1680 /* stride_for_teases tells how much to stride if we get a */
1681 /* match on the far character but are subsequently */
1682 /* disappointed, by recording what the stride would have been */
1683 /* for that character if the last character had been */
1684 /* different. */
1685 }
1686 infinity = dirlen - infinity;
1687 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1688 /* loop invariant - POS_BYTE points at where last char (first
1689 char if reverse) of pattern would align in a possible match. */
1690 while (n != 0)
1691 {
1692 int tail_end;
1693 unsigned char *tail_end_ptr;
1694
1695 /* It's been reported that some (broken) compiler thinks that
1696 Boolean expressions in an arithmetic context are unsigned.
1697 Using an explicit ?1:0 prevents this. */
1698 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1699 < 0)
1700 return (n * (0 - direction));
1701 /* First we do the part we can by pointers (maybe nothing) */
1702 QUIT;
1703 pat = base_pat;
1704 limit = pos_byte - dirlen + direction;
1705 if (direction > 0)
1706 {
1707 limit = BUFFER_CEILING_OF (limit);
1708 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1709 can take on without hitting edge of buffer or the gap. */
1710 limit = min (limit, pos_byte + 20000);
1711 limit = min (limit, lim_byte - 1);
1712 }
1713 else
1714 {
1715 limit = BUFFER_FLOOR_OF (limit);
1716 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1717 can take on without hitting edge of buffer or the gap. */
1718 limit = max (limit, pos_byte - 20000);
1719 limit = max (limit, lim_byte);
1720 }
1721 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1722 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1723
1724 if ((limit - pos_byte) * direction > 20)
1725 {
1726 unsigned char *p2;
1727
1728 p_limit = BYTE_POS_ADDR (limit);
1729 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1730 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1731 while (1) /* use one cursor setting as long as i can */
1732 {
1733 if (direction > 0) /* worth duplicating */
1734 {
1735 /* Use signed comparison if appropriate
1736 to make cursor+infinity sure to be > p_limit.
1737 Assuming that the buffer lies in a range of addresses
1738 that are all "positive" (as ints) or all "negative",
1739 either kind of comparison will work as long
1740 as we don't step by infinity. So pick the kind
1741 that works when we do step by infinity. */
1742 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1743 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1744 cursor += BM_tab[*cursor];
1745 else
1746 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1747 cursor += BM_tab[*cursor];
1748 }
1749 else
1750 {
1751 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1752 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1753 cursor += BM_tab[*cursor];
1754 else
1755 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1756 cursor += BM_tab[*cursor];
1757 }
1758 /* If you are here, cursor is beyond the end of the searched region. */
1759 /* This can happen if you match on the far character of the pattern, */
1760 /* because the "stride" of that character is infinity, a number able */
1761 /* to throw you well beyond the end of the search. It can also */
1762 /* happen if you fail to match within the permitted region and would */
1763 /* otherwise try a character beyond that region */
1764 if ((cursor - p_limit) * direction <= len_byte)
1765 break; /* a small overrun is genuine */
1766 cursor -= infinity; /* large overrun = hit */
1767 i = dirlen - direction;
1768 if (! NILP (trt))
1769 {
1770 while ((i -= direction) + direction != 0)
1771 {
1772 int ch;
1773 cursor -= direction;
1774 /* Translate only the last byte of a character. */
1775 if (! multibyte
1776 || ((cursor == tail_end_ptr
1777 || CHAR_HEAD_P (cursor[1]))
1778 && (CHAR_HEAD_P (cursor[0])
1779 || (translate_prev_byte == cursor[-1]
1780 && (CHAR_HEAD_P (translate_prev_byte)
1781 || translate_anteprev_byte == cursor[-2])))))
1782 ch = simple_translate[*cursor];
1783 else
1784 ch = *cursor;
1785 if (pat[i] != ch)
1786 break;
1787 }
1788 }
1789 else
1790 {
1791 while ((i -= direction) + direction != 0)
1792 {
1793 cursor -= direction;
1794 if (pat[i] != *cursor)
1795 break;
1796 }
1797 }
1798 cursor += dirlen - i - direction; /* fix cursor */
1799 if (i + direction == 0)
1800 {
1801 int position;
1802
1803 cursor -= direction;
1804
1805 position = pos_byte + cursor - p2 + ((direction > 0)
1806 ? 1 - len_byte : 0);
1807 set_search_regs (position, len_byte);
1808
1809 if ((n -= direction) != 0)
1810 cursor += dirlen; /* to resume search */
1811 else
1812 return ((direction > 0)
1813 ? search_regs.end[0] : search_regs.start[0]);
1814 }
1815 else
1816 cursor += stride_for_teases; /* <sigh> we lose - */
1817 }
1818 pos_byte += cursor - p2;
1819 }
1820 else
1821 /* Now we'll pick up a clump that has to be done the hard */
1822 /* way because it covers a discontinuity */
1823 {
1824 limit = ((direction > 0)
1825 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1826 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1827 limit = ((direction > 0)
1828 ? min (limit + len_byte, lim_byte - 1)
1829 : max (limit - len_byte, lim_byte));
1830 /* LIMIT is now the last value POS_BYTE can have
1831 and still be valid for a possible match. */
1832 while (1)
1833 {
1834 /* This loop can be coded for space rather than */
1835 /* speed because it will usually run only once. */
1836 /* (the reach is at most len + 21, and typically */
1837 /* does not exceed len) */
1838 while ((limit - pos_byte) * direction >= 0)
1839 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1840 /* now run the same tests to distinguish going off the */
1841 /* end, a match or a phony match. */
1842 if ((pos_byte - limit) * direction <= len_byte)
1843 break; /* ran off the end */
1844 /* Found what might be a match.
1845 Set POS_BYTE back to last (first if reverse) pos. */
1846 pos_byte -= infinity;
1847 i = dirlen - direction;
1848 while ((i -= direction) + direction != 0)
1849 {
1850 int ch;
1851 unsigned char *ptr;
1852 pos_byte -= direction;
1853 ptr = BYTE_POS_ADDR (pos_byte);
1854 /* Translate only the last byte of a character. */
1855 if (! multibyte
1856 || ((ptr == tail_end_ptr
1857 || CHAR_HEAD_P (ptr[1]))
1858 && (CHAR_HEAD_P (ptr[0])
1859 || (translate_prev_byte == ptr[-1]
1860 && (CHAR_HEAD_P (translate_prev_byte)
1861 || translate_anteprev_byte == ptr[-2])))))
1862 ch = simple_translate[*ptr];
1863 else
1864 ch = *ptr;
1865 if (pat[i] != ch)
1866 break;
1867 }
1868 /* Above loop has moved POS_BYTE part or all the way
1869 back to the first pos (last pos if reverse).
1870 Set it once again at the last (first if reverse) char. */
1871 pos_byte += dirlen - i- direction;
1872 if (i + direction == 0)
1873 {
1874 int position;
1875 pos_byte -= direction;
1876
1877 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1878
1879 set_search_regs (position, len_byte);
1880
1881 if ((n -= direction) != 0)
1882 pos_byte += dirlen; /* to resume search */
1883 else
1884 return ((direction > 0)
1885 ? search_regs.end[0] : search_regs.start[0]);
1886 }
1887 else
1888 pos_byte += stride_for_teases;
1889 }
1890 }
1891 /* We have done one clump. Can we continue? */
1892 if ((lim_byte - pos_byte) * direction < 0)
1893 return ((0 - n) * direction);
1894 }
1895 return BYTE_TO_CHAR (pos_byte);
1896 }
1897
1898 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1899 for a match just found in the current buffer. */
1900
1901 static void
1902 set_search_regs (beg_byte, nbytes)
1903 int beg_byte, nbytes;
1904 {
1905 /* Make sure we have registers in which to store
1906 the match position. */
1907 if (search_regs.num_regs == 0)
1908 {
1909 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1910 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1911 search_regs.num_regs = 2;
1912 }
1913
1914 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1915 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1916 XSETBUFFER (last_thing_searched, current_buffer);
1917 }
1918 \f
1919 /* Given a string of words separated by word delimiters,
1920 compute a regexp that matches those exact words
1921 separated by arbitrary punctuation. */
1922
1923 static Lisp_Object
1924 wordify (string)
1925 Lisp_Object string;
1926 {
1927 register unsigned char *p, *o;
1928 register int i, i_byte, len, punct_count = 0, word_count = 0;
1929 Lisp_Object val;
1930 int prev_c = 0;
1931 int adjust;
1932
1933 CHECK_STRING (string, 0);
1934 p = XSTRING (string)->data;
1935 len = XSTRING (string)->size;
1936
1937 for (i = 0, i_byte = 0; i < len; )
1938 {
1939 int c;
1940
1941 if (STRING_MULTIBYTE (string))
1942 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1943 else
1944 c = XSTRING (string)->data[i++];
1945
1946 if (SYNTAX (c) != Sword)
1947 {
1948 punct_count++;
1949 if (i > 0 && SYNTAX (prev_c) == Sword)
1950 word_count++;
1951 }
1952
1953 prev_c = c;
1954 }
1955
1956 if (SYNTAX (prev_c) == Sword)
1957 word_count++;
1958 if (!word_count)
1959 return build_string ("");
1960
1961 adjust = - punct_count + 5 * (word_count - 1) + 4;
1962 val = make_uninit_multibyte_string (len + adjust,
1963 STRING_BYTES (XSTRING (string)) + adjust);
1964
1965 o = XSTRING (val)->data;
1966 *o++ = '\\';
1967 *o++ = 'b';
1968 prev_c = 0;
1969
1970 for (i = 0, i_byte = 0; i < len; )
1971 {
1972 int c;
1973 int i_byte_orig = i_byte;
1974
1975 if (STRING_MULTIBYTE (string))
1976 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
1977 else
1978 c = XSTRING (string)->data[i++];
1979
1980 if (SYNTAX (c) == Sword)
1981 {
1982 bcopy (&XSTRING (string)->data[i_byte_orig], o,
1983 i_byte - i_byte_orig);
1984 o += i_byte - i_byte_orig;
1985 }
1986 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
1987 {
1988 *o++ = '\\';
1989 *o++ = 'W';
1990 *o++ = '\\';
1991 *o++ = 'W';
1992 *o++ = '*';
1993 }
1994
1995 prev_c = c;
1996 }
1997
1998 *o++ = '\\';
1999 *o++ = 'b';
2000
2001 return val;
2002 }
2003 \f
2004 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2005 "MSearch backward: ",
2006 "Search backward from point for STRING.\n\
2007 Set point to the beginning of the occurrence found, and return point.\n\
2008 An optional second argument bounds the search; it is a buffer position.\n\
2009 The match found must not extend before that position.\n\
2010 Optional third argument, if t, means if fail just return nil (no error).\n\
2011 If not nil and not t, position at limit of search and return nil.\n\
2012 Optional fourth argument is repeat count--search for successive occurrences.\n\
2013 See also the functions `match-beginning', `match-end' and `replace-match'.")
2014 (string, bound, noerror, count)
2015 Lisp_Object string, bound, noerror, count;
2016 {
2017 return search_command (string, bound, noerror, count, -1, 0, 0);
2018 }
2019
2020 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2021 "Search forward from point for STRING.\n\
2022 Set point to the end of the occurrence found, and return point.\n\
2023 An optional second argument bounds the search; it is a buffer position.\n\
2024 The match found must not extend after that position. nil is equivalent\n\
2025 to (point-max).\n\
2026 Optional third argument, if t, means if fail just return nil (no error).\n\
2027 If not nil and not t, move to limit of search and return nil.\n\
2028 Optional fourth argument is repeat count--search for successive occurrences.\n\
2029 See also the functions `match-beginning', `match-end' and `replace-match'.")
2030 (string, bound, noerror, count)
2031 Lisp_Object string, bound, noerror, count;
2032 {
2033 return search_command (string, bound, noerror, count, 1, 0, 0);
2034 }
2035
2036 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2037 "sWord search backward: ",
2038 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2039 Set point to the beginning of the occurrence found, and return point.\n\
2040 An optional second argument bounds the search; it is a buffer position.\n\
2041 The match found must not extend before that position.\n\
2042 Optional third argument, if t, means if fail just return nil (no error).\n\
2043 If not nil and not t, move to limit of search and return nil.\n\
2044 Optional fourth argument is repeat count--search for successive occurrences.")
2045 (string, bound, noerror, count)
2046 Lisp_Object string, bound, noerror, count;
2047 {
2048 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2049 }
2050
2051 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2052 "sWord search: ",
2053 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2054 Set point to the end of the occurrence found, and return point.\n\
2055 An optional second argument bounds the search; it is a buffer position.\n\
2056 The match found must not extend after that position.\n\
2057 Optional third argument, if t, means if fail just return nil (no error).\n\
2058 If not nil and not t, move to limit of search and return nil.\n\
2059 Optional fourth argument is repeat count--search for successive occurrences.")
2060 (string, bound, noerror, count)
2061 Lisp_Object string, bound, noerror, count;
2062 {
2063 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2064 }
2065
2066 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2067 "sRE search backward: ",
2068 "Search backward from point for match for regular expression REGEXP.\n\
2069 Set point to the beginning of the match, and return point.\n\
2070 The match found is the one starting last in the buffer\n\
2071 and yet ending before the origin of the search.\n\
2072 An optional second argument bounds the search; it is a buffer position.\n\
2073 The match found must start at or after that position.\n\
2074 Optional third argument, if t, means if fail just return nil (no error).\n\
2075 If not nil and not t, move to limit of search and return nil.\n\
2076 Optional fourth argument is repeat count--search for successive occurrences.\n\
2077 See also the functions `match-beginning', `match-end' and `replace-match'.")
2078 (regexp, bound, noerror, count)
2079 Lisp_Object regexp, bound, noerror, count;
2080 {
2081 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2082 }
2083
2084 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2085 "sRE search: ",
2086 "Search forward from point for regular expression REGEXP.\n\
2087 Set point to the end of the occurrence found, and return point.\n\
2088 An optional second argument bounds the search; it is a buffer position.\n\
2089 The match found must not extend after that position.\n\
2090 Optional third argument, if t, means if fail just return nil (no error).\n\
2091 If not nil and not t, move to limit of search and return nil.\n\
2092 Optional fourth argument is repeat count--search for successive occurrences.\n\
2093 See also the functions `match-beginning', `match-end' and `replace-match'.")
2094 (regexp, bound, noerror, count)
2095 Lisp_Object regexp, bound, noerror, count;
2096 {
2097 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2098 }
2099
2100 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2101 "sPosix search backward: ",
2102 "Search backward from point for match for regular expression REGEXP.\n\
2103 Find the longest match in accord with Posix regular expression rules.\n\
2104 Set point to the beginning of the match, and return point.\n\
2105 The match found is the one starting last in the buffer\n\
2106 and yet ending before the origin of the search.\n\
2107 An optional second argument bounds the search; it is a buffer position.\n\
2108 The match found must start at or after that position.\n\
2109 Optional third argument, if t, means if fail just return nil (no error).\n\
2110 If not nil and not t, move to limit of search and return nil.\n\
2111 Optional fourth argument is repeat count--search for successive occurrences.\n\
2112 See also the functions `match-beginning', `match-end' and `replace-match'.")
2113 (regexp, bound, noerror, count)
2114 Lisp_Object regexp, bound, noerror, count;
2115 {
2116 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2117 }
2118
2119 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2120 "sPosix search: ",
2121 "Search forward from point for regular expression REGEXP.\n\
2122 Find the longest match in accord with Posix regular expression rules.\n\
2123 Set point to the end of the occurrence found, and return point.\n\
2124 An optional second argument bounds the search; it is a buffer position.\n\
2125 The match found must not extend after that position.\n\
2126 Optional third argument, if t, means if fail just return nil (no error).\n\
2127 If not nil and not t, move to limit of search and return nil.\n\
2128 Optional fourth argument is repeat count--search for successive occurrences.\n\
2129 See also the functions `match-beginning', `match-end' and `replace-match'.")
2130 (regexp, bound, noerror, count)
2131 Lisp_Object regexp, bound, noerror, count;
2132 {
2133 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2134 }
2135 \f
2136 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2137 "Replace text matched by last search with NEWTEXT.\n\
2138 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2139 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2140 based on the replaced text.\n\
2141 If the replaced text has only capital letters\n\
2142 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2143 If the replaced text has at least one word starting with a capital letter,\n\
2144 then capitalize each word in NEWTEXT.\n\n\
2145 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2146 Otherwise treat `\\' as special:\n\
2147 `\\&' in NEWTEXT means substitute original matched text.\n\
2148 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2149 If Nth parens didn't match, substitute nothing.\n\
2150 `\\\\' means insert one `\\'.\n\
2151 FIXEDCASE and LITERAL are optional arguments.\n\
2152 Leaves point at end of replacement text.\n\
2153 \n\
2154 The optional fourth argument STRING can be a string to modify.\n\
2155 In that case, this function creates and returns a new string\n\
2156 which is made by replacing the part of STRING that was matched.\n\
2157 \n\
2158 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2159 It says to replace just that subexpression instead of the whole match.\n\
2160 This is useful only after a regular expression search or match\n\
2161 since only regular expressions have distinguished subexpressions.")
2162 (newtext, fixedcase, literal, string, subexp)
2163 Lisp_Object newtext, fixedcase, literal, string, subexp;
2164 {
2165 enum { nochange, all_caps, cap_initial } case_action;
2166 register int pos, last;
2167 int some_multiletter_word;
2168 int some_lowercase;
2169 int some_uppercase;
2170 int some_nonuppercase_initial;
2171 register int c, prevc;
2172 int inslen;
2173 int sub;
2174 int opoint, newpoint;
2175
2176 CHECK_STRING (newtext, 0);
2177
2178 if (! NILP (string))
2179 CHECK_STRING (string, 4);
2180
2181 case_action = nochange; /* We tried an initialization */
2182 /* but some C compilers blew it */
2183
2184 if (search_regs.num_regs <= 0)
2185 error ("replace-match called before any match found");
2186
2187 if (NILP (subexp))
2188 sub = 0;
2189 else
2190 {
2191 CHECK_NUMBER (subexp, 3);
2192 sub = XINT (subexp);
2193 if (sub < 0 || sub >= search_regs.num_regs)
2194 args_out_of_range (subexp, make_number (search_regs.num_regs));
2195 }
2196
2197 if (NILP (string))
2198 {
2199 if (search_regs.start[sub] < BEGV
2200 || search_regs.start[sub] > search_regs.end[sub]
2201 || search_regs.end[sub] > ZV)
2202 args_out_of_range (make_number (search_regs.start[sub]),
2203 make_number (search_regs.end[sub]));
2204 }
2205 else
2206 {
2207 if (search_regs.start[sub] < 0
2208 || search_regs.start[sub] > search_regs.end[sub]
2209 || search_regs.end[sub] > XSTRING (string)->size)
2210 args_out_of_range (make_number (search_regs.start[sub]),
2211 make_number (search_regs.end[sub]));
2212 }
2213
2214 if (NILP (fixedcase))
2215 {
2216 int beg;
2217 /* Decide how to casify by examining the matched text. */
2218
2219 if (NILP (string))
2220 last = CHAR_TO_BYTE (search_regs.end[sub]);
2221 else
2222 last = search_regs.end[sub];
2223
2224 if (NILP (string))
2225 beg = CHAR_TO_BYTE (search_regs.start[sub]);
2226 else
2227 beg = search_regs.start[sub];
2228
2229 prevc = '\n';
2230 case_action = all_caps;
2231
2232 /* some_multiletter_word is set nonzero if any original word
2233 is more than one letter long. */
2234 some_multiletter_word = 0;
2235 some_lowercase = 0;
2236 some_nonuppercase_initial = 0;
2237 some_uppercase = 0;
2238
2239 for (pos = beg; pos < last; pos++)
2240 {
2241 if (NILP (string))
2242 c = FETCH_BYTE (pos);
2243 else
2244 c = XSTRING (string)->data[pos];
2245
2246 if (LOWERCASEP (c))
2247 {
2248 /* Cannot be all caps if any original char is lower case */
2249
2250 some_lowercase = 1;
2251 if (SYNTAX (prevc) != Sword)
2252 some_nonuppercase_initial = 1;
2253 else
2254 some_multiletter_word = 1;
2255 }
2256 else if (!NOCASEP (c))
2257 {
2258 some_uppercase = 1;
2259 if (SYNTAX (prevc) != Sword)
2260 ;
2261 else
2262 some_multiletter_word = 1;
2263 }
2264 else
2265 {
2266 /* If the initial is a caseless word constituent,
2267 treat that like a lowercase initial. */
2268 if (SYNTAX (prevc) != Sword)
2269 some_nonuppercase_initial = 1;
2270 }
2271
2272 prevc = c;
2273 }
2274
2275 /* Convert to all caps if the old text is all caps
2276 and has at least one multiletter word. */
2277 if (! some_lowercase && some_multiletter_word)
2278 case_action = all_caps;
2279 /* Capitalize each word, if the old text has all capitalized words. */
2280 else if (!some_nonuppercase_initial && some_multiletter_word)
2281 case_action = cap_initial;
2282 else if (!some_nonuppercase_initial && some_uppercase)
2283 /* Should x -> yz, operating on X, give Yz or YZ?
2284 We'll assume the latter. */
2285 case_action = all_caps;
2286 else
2287 case_action = nochange;
2288 }
2289
2290 /* Do replacement in a string. */
2291 if (!NILP (string))
2292 {
2293 Lisp_Object before, after;
2294
2295 before = Fsubstring (string, make_number (0),
2296 make_number (search_regs.start[sub]));
2297 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2298
2299 /* Substitute parts of the match into NEWTEXT
2300 if desired. */
2301 if (NILP (literal))
2302 {
2303 int lastpos = 0;
2304 int lastpos_byte = 0;
2305 /* We build up the substituted string in ACCUM. */
2306 Lisp_Object accum;
2307 Lisp_Object middle;
2308 int pos_byte;
2309
2310 accum = Qnil;
2311
2312 for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
2313 {
2314 int substart = -1;
2315 int subend;
2316 int delbackslash = 0;
2317
2318 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2319
2320 if (c == '\\')
2321 {
2322 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2323 if (c == '&')
2324 {
2325 substart = search_regs.start[sub];
2326 subend = search_regs.end[sub];
2327 }
2328 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2329 {
2330 if (search_regs.start[c - '0'] >= 0)
2331 {
2332 substart = search_regs.start[c - '0'];
2333 subend = search_regs.end[c - '0'];
2334 }
2335 }
2336 else if (c == '\\')
2337 delbackslash = 1;
2338 else
2339 error ("Invalid use of `\\' in replacement text");
2340 }
2341 if (substart >= 0)
2342 {
2343 if (pos - 2 != lastpos)
2344 middle = substring_both (newtext, lastpos,
2345 lastpos_byte,
2346 pos - 2, pos_byte - 2);
2347 else
2348 middle = Qnil;
2349 accum = concat3 (accum, middle,
2350 Fsubstring (string,
2351 make_number (substart),
2352 make_number (subend)));
2353 lastpos = pos;
2354 lastpos_byte = pos_byte;
2355 }
2356 else if (delbackslash)
2357 {
2358 middle = substring_both (newtext, lastpos,
2359 lastpos_byte,
2360 pos - 1, pos_byte - 1);
2361
2362 accum = concat2 (accum, middle);
2363 lastpos = pos;
2364 lastpos_byte = pos_byte;
2365 }
2366 }
2367
2368 if (pos != lastpos)
2369 middle = substring_both (newtext, lastpos,
2370 lastpos_byte,
2371 pos, pos_byte);
2372 else
2373 middle = Qnil;
2374
2375 newtext = concat2 (accum, middle);
2376 }
2377
2378 /* Do case substitution in NEWTEXT if desired. */
2379 if (case_action == all_caps)
2380 newtext = Fupcase (newtext);
2381 else if (case_action == cap_initial)
2382 newtext = Fupcase_initials (newtext);
2383
2384 return concat3 (before, newtext, after);
2385 }
2386
2387 /* Record point, the move (quietly) to the start of the match. */
2388 if (PT > search_regs.start[sub])
2389 opoint = PT - ZV;
2390 else
2391 opoint = PT;
2392
2393 TEMP_SET_PT (search_regs.start[sub]);
2394
2395 /* We insert the replacement text before the old text, and then
2396 delete the original text. This means that markers at the
2397 beginning or end of the original will float to the corresponding
2398 position in the replacement. */
2399 if (!NILP (literal))
2400 Finsert_and_inherit (1, &newtext);
2401 else
2402 {
2403 struct gcpro gcpro1;
2404 GCPRO1 (newtext);
2405
2406 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
2407 {
2408 int offset = PT - search_regs.start[sub];
2409
2410 c = XSTRING (newtext)->data[pos];
2411 if (c == '\\')
2412 {
2413 c = XSTRING (newtext)->data[++pos];
2414 if (c == '&')
2415 Finsert_buffer_substring
2416 (Fcurrent_buffer (),
2417 make_number (search_regs.start[sub] + offset),
2418 make_number (search_regs.end[sub] + offset));
2419 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2420 {
2421 if (search_regs.start[c - '0'] >= 1)
2422 Finsert_buffer_substring
2423 (Fcurrent_buffer (),
2424 make_number (search_regs.start[c - '0'] + offset),
2425 make_number (search_regs.end[c - '0'] + offset));
2426 }
2427 else if (c == '\\')
2428 insert_char (c);
2429 else
2430 error ("Invalid use of `\\' in replacement text");
2431 }
2432 else
2433 insert_char (c);
2434 }
2435 UNGCPRO;
2436 }
2437
2438 inslen = PT - (search_regs.start[sub]);
2439 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
2440
2441 if (case_action == all_caps)
2442 Fupcase_region (make_number (PT - inslen), make_number (PT));
2443 else if (case_action == cap_initial)
2444 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
2445
2446 newpoint = PT;
2447
2448 /* Put point back where it was in the text. */
2449 if (opoint <= 0)
2450 TEMP_SET_PT (opoint + ZV);
2451 else
2452 TEMP_SET_PT (opoint);
2453
2454 /* Now move point "officially" to the start of the inserted replacement. */
2455 move_if_not_intangible (newpoint);
2456
2457 return Qnil;
2458 }
2459 \f
2460 static Lisp_Object
2461 match_limit (num, beginningp)
2462 Lisp_Object num;
2463 int beginningp;
2464 {
2465 register int n;
2466
2467 CHECK_NUMBER (num, 0);
2468 n = XINT (num);
2469 if (n < 0 || n >= search_regs.num_regs)
2470 args_out_of_range (num, make_number (search_regs.num_regs));
2471 if (search_regs.num_regs <= 0
2472 || search_regs.start[n] < 0)
2473 return Qnil;
2474 return (make_number ((beginningp) ? search_regs.start[n]
2475 : search_regs.end[n]));
2476 }
2477
2478 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2479 "Return position of start of text matched by last search.\n\
2480 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2481 regexp.\n\
2482 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2483 SUBEXP pairs.\n\
2484 Zero means the entire text matched by the whole regexp or whole string.")
2485 (subexp)
2486 Lisp_Object subexp;
2487 {
2488 return match_limit (subexp, 1);
2489 }
2490
2491 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2492 "Return position of end of text matched by last search.\n\
2493 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2494 regexp.\n\
2495 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2496 SUBEXP pairs.\n\
2497 Zero means the entire text matched by the whole regexp or whole string.")
2498 (subexp)
2499 Lisp_Object subexp;
2500 {
2501 return match_limit (subexp, 0);
2502 }
2503
2504 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
2505 "Return a list containing all info on what the last search matched.\n\
2506 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2507 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2508 if the last match was on a buffer; integers or nil if a string was matched.\n\
2509 Use `store-match-data' to reinstate the data in this list.\n\
2510 \n\
2511 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2512 \(rather than markers) to represent buffer positions.\n\
2513 If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2514 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2515 (integers, reuse)
2516 Lisp_Object integers, reuse;
2517 {
2518 Lisp_Object tail, prev;
2519 Lisp_Object *data;
2520 int i, len;
2521
2522 if (NILP (last_thing_searched))
2523 return Qnil;
2524
2525 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
2526 * sizeof (Lisp_Object));
2527
2528 len = -1;
2529 for (i = 0; i < search_regs.num_regs; i++)
2530 {
2531 int start = search_regs.start[i];
2532 if (start >= 0)
2533 {
2534 if (EQ (last_thing_searched, Qt)
2535 || ! NILP (integers))
2536 {
2537 XSETFASTINT (data[2 * i], start);
2538 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2539 }
2540 else if (BUFFERP (last_thing_searched))
2541 {
2542 data[2 * i] = Fmake_marker ();
2543 Fset_marker (data[2 * i],
2544 make_number (start),
2545 last_thing_searched);
2546 data[2 * i + 1] = Fmake_marker ();
2547 Fset_marker (data[2 * i + 1],
2548 make_number (search_regs.end[i]),
2549 last_thing_searched);
2550 }
2551 else
2552 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2553 abort ();
2554
2555 len = i;
2556 }
2557 else
2558 data[2 * i] = data [2 * i + 1] = Qnil;
2559 }
2560
2561 /* If REUSE is not usable, cons up the values and return them. */
2562 if (! CONSP (reuse))
2563 return Flist (2 * len + 2, data);
2564
2565 /* If REUSE is a list, store as many value elements as will fit
2566 into the elements of REUSE. */
2567 for (i = 0, tail = reuse; CONSP (tail);
2568 i++, tail = XCONS (tail)->cdr)
2569 {
2570 if (i < 2 * len + 2)
2571 XCONS (tail)->car = data[i];
2572 else
2573 XCONS (tail)->car = Qnil;
2574 prev = tail;
2575 }
2576
2577 /* If we couldn't fit all value elements into REUSE,
2578 cons up the rest of them and add them to the end of REUSE. */
2579 if (i < 2 * len + 2)
2580 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
2581
2582 return reuse;
2583 }
2584
2585
2586 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 1, 0,
2587 "Set internal data on last search match from elements of LIST.\n\
2588 LIST should have been created by calling `match-data' previously.")
2589 (list)
2590 register Lisp_Object list;
2591 {
2592 register int i;
2593 register Lisp_Object marker;
2594
2595 if (running_asynch_code)
2596 save_search_regs ();
2597
2598 if (!CONSP (list) && !NILP (list))
2599 list = wrong_type_argument (Qconsp, list);
2600
2601 /* Unless we find a marker with a buffer in LIST, assume that this
2602 match data came from a string. */
2603 last_thing_searched = Qt;
2604
2605 /* Allocate registers if they don't already exist. */
2606 {
2607 int length = XFASTINT (Flength (list)) / 2;
2608
2609 if (length > search_regs.num_regs)
2610 {
2611 if (search_regs.num_regs == 0)
2612 {
2613 search_regs.start
2614 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2615 search_regs.end
2616 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2617 }
2618 else
2619 {
2620 search_regs.start
2621 = (regoff_t *) xrealloc (search_regs.start,
2622 length * sizeof (regoff_t));
2623 search_regs.end
2624 = (regoff_t *) xrealloc (search_regs.end,
2625 length * sizeof (regoff_t));
2626 }
2627
2628 search_regs.num_regs = length;
2629 }
2630 }
2631
2632 for (i = 0; i < search_regs.num_regs; i++)
2633 {
2634 marker = Fcar (list);
2635 if (NILP (marker))
2636 {
2637 search_regs.start[i] = -1;
2638 list = Fcdr (list);
2639 }
2640 else
2641 {
2642 if (MARKERP (marker))
2643 {
2644 if (XMARKER (marker)->buffer == 0)
2645 XSETFASTINT (marker, 0);
2646 else
2647 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2648 }
2649
2650 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2651 search_regs.start[i] = XINT (marker);
2652 list = Fcdr (list);
2653
2654 marker = Fcar (list);
2655 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2656 XSETFASTINT (marker, 0);
2657
2658 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2659 search_regs.end[i] = XINT (marker);
2660 }
2661 list = Fcdr (list);
2662 }
2663
2664 return Qnil;
2665 }
2666
2667 /* If non-zero the match data have been saved in saved_search_regs
2668 during the execution of a sentinel or filter. */
2669 static int search_regs_saved;
2670 static struct re_registers saved_search_regs;
2671
2672 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2673 if asynchronous code (filter or sentinel) is running. */
2674 static void
2675 save_search_regs ()
2676 {
2677 if (!search_regs_saved)
2678 {
2679 saved_search_regs.num_regs = search_regs.num_regs;
2680 saved_search_regs.start = search_regs.start;
2681 saved_search_regs.end = search_regs.end;
2682 search_regs.num_regs = 0;
2683 search_regs.start = 0;
2684 search_regs.end = 0;
2685
2686 search_regs_saved = 1;
2687 }
2688 }
2689
2690 /* Called upon exit from filters and sentinels. */
2691 void
2692 restore_match_data ()
2693 {
2694 if (search_regs_saved)
2695 {
2696 if (search_regs.num_regs > 0)
2697 {
2698 xfree (search_regs.start);
2699 xfree (search_regs.end);
2700 }
2701 search_regs.num_regs = saved_search_regs.num_regs;
2702 search_regs.start = saved_search_regs.start;
2703 search_regs.end = saved_search_regs.end;
2704
2705 search_regs_saved = 0;
2706 }
2707 }
2708
2709 /* Quote a string to inactivate reg-expr chars */
2710
2711 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2712 "Return a regexp string which matches exactly STRING and nothing else.")
2713 (string)
2714 Lisp_Object string;
2715 {
2716 register unsigned char *in, *out, *end;
2717 register unsigned char *temp;
2718 int backslashes_added = 0;
2719
2720 CHECK_STRING (string, 0);
2721
2722 temp = (unsigned char *) alloca (STRING_BYTES (XSTRING (string)) * 2);
2723
2724 /* Now copy the data into the new string, inserting escapes. */
2725
2726 in = XSTRING (string)->data;
2727 end = in + STRING_BYTES (XSTRING (string));
2728 out = temp;
2729
2730 for (; in != end; in++)
2731 {
2732 if (*in == '[' || *in == ']'
2733 || *in == '*' || *in == '.' || *in == '\\'
2734 || *in == '?' || *in == '+'
2735 || *in == '^' || *in == '$')
2736 *out++ = '\\', backslashes_added++;
2737 *out++ = *in;
2738 }
2739
2740 return make_specified_string (temp,
2741 XSTRING (string)->size + backslashes_added,
2742 out - temp,
2743 STRING_MULTIBYTE (string));
2744 }
2745 \f
2746 void
2747 syms_of_search ()
2748 {
2749 register int i;
2750
2751 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2752 {
2753 searchbufs[i].buf.allocated = 100;
2754 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2755 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2756 searchbufs[i].regexp = Qnil;
2757 staticpro (&searchbufs[i].regexp);
2758 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2759 }
2760 searchbuf_head = &searchbufs[0];
2761
2762 Qsearch_failed = intern ("search-failed");
2763 staticpro (&Qsearch_failed);
2764 Qinvalid_regexp = intern ("invalid-regexp");
2765 staticpro (&Qinvalid_regexp);
2766
2767 Fput (Qsearch_failed, Qerror_conditions,
2768 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2769 Fput (Qsearch_failed, Qerror_message,
2770 build_string ("Search failed"));
2771
2772 Fput (Qinvalid_regexp, Qerror_conditions,
2773 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2774 Fput (Qinvalid_regexp, Qerror_message,
2775 build_string ("Invalid regexp"));
2776
2777 last_thing_searched = Qnil;
2778 staticpro (&last_thing_searched);
2779
2780 defsubr (&Slooking_at);
2781 defsubr (&Sposix_looking_at);
2782 defsubr (&Sstring_match);
2783 defsubr (&Sposix_string_match);
2784 defsubr (&Ssearch_forward);
2785 defsubr (&Ssearch_backward);
2786 defsubr (&Sword_search_forward);
2787 defsubr (&Sword_search_backward);
2788 defsubr (&Sre_search_forward);
2789 defsubr (&Sre_search_backward);
2790 defsubr (&Sposix_search_forward);
2791 defsubr (&Sposix_search_backward);
2792 defsubr (&Sreplace_match);
2793 defsubr (&Smatch_beginning);
2794 defsubr (&Smatch_end);
2795 defsubr (&Smatch_data);
2796 defsubr (&Sset_match_data);
2797 defsubr (&Sregexp_quote);
2798 }