]> code.delx.au - gnu-emacs/blob - src/search.c
*** empty log message ***
[gnu-emacs] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1992 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 1, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
19
20
21 #include "config.h"
22 #include "lisp.h"
23 #include "syntax.h"
24 #include "buffer.h"
25 #include "commands.h"
26
27 #include <sys/types.h>
28 #include "regex.h"
29
30 #define max(a, b) ((a) > (b) ? (a) : (b))
31 #define min(a, b) ((a) < (b) ? (a) : (b))
32
33 /* We compile regexps into this buffer and then use it for searching. */
34
35 struct re_pattern_buffer searchbuf;
36
37 char search_fastmap[0400];
38
39 /* Last regexp we compiled */
40
41 Lisp_Object last_regexp;
42
43 /* Every call to re_match, etc., must pass &search_regs as the regs
44 argument unless you can show it is unnecessary (i.e., if re_match
45 is certainly going to be called again before region-around-match
46 can be called).
47
48 Since the registers are now dynamically allocated, we need to make
49 sure not to refer to the Nth register before checking that it has
50 been allocated by checking search_regs.num_regs.
51
52 The regex code keeps track of whether it has allocated the search
53 buffer using bits in searchbuf. This means that whenever you
54 compile a new pattern, it completely forgets whether it has
55 allocated any registers, and will allocate new registers the next
56 time you call a searching or matching function. Therefore, we need
57 to call re_set_registers after compiling a new pattern or after
58 setting the match registers, so that the regex functions will be
59 able to free or re-allocate it properly. */
60 static struct re_registers search_regs;
61
62 /* Nonzero if search_regs are indices in a string; 0 if in a buffer. */
63
64 static int search_regs_from_string;
65
66 /* error condition signalled when regexp compile_pattern fails */
67
68 Lisp_Object Qinvalid_regexp;
69
70 static void
71 matcher_overflow ()
72 {
73 error ("Stack overflow in regexp matcher");
74 }
75
76 #ifdef __STDC__
77 #define CONST const
78 #else
79 #define CONST
80 #endif
81
82 /* Compile a regexp and signal a Lisp error if anything goes wrong. */
83
84 compile_pattern (pattern, bufp, regp, translate)
85 Lisp_Object pattern;
86 struct re_pattern_buffer *bufp;
87 struct re_registers *regp;
88 char *translate;
89 {
90 CONST char *val;
91 Lisp_Object dummy;
92
93 if (EQ (pattern, last_regexp)
94 && translate == bufp->translate)
95 return;
96
97 last_regexp = Qnil;
98 bufp->translate = translate;
99 val = re_compile_pattern ((char *) XSTRING (pattern)->data,
100 XSTRING (pattern)->size,
101 bufp);
102 if (val)
103 {
104 dummy = build_string (val);
105 while (1)
106 Fsignal (Qinvalid_regexp, Fcons (dummy, Qnil));
107 }
108
109 last_regexp = pattern;
110
111 /* Advise the searching functions about the space we have allocated
112 for register data. */
113 re_set_registers (bufp, regp, regp->num_regs, regp->start, regp->end);
114
115 return;
116 }
117
118 /* Error condition used for failing searches */
119 Lisp_Object Qsearch_failed;
120
121 Lisp_Object
122 signal_failure (arg)
123 Lisp_Object arg;
124 {
125 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
126 return Qnil;
127 }
128 \f
129 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
130 "Return t if text after point matches regular expression PAT.\n\
131 This function modifies the match data that `match-beginning',\n\
132 `match-end' and `match-data' access; save and restore the match\n\
133 data if you want to preserve them.")
134 (string)
135 Lisp_Object string;
136 {
137 Lisp_Object val;
138 unsigned char *p1, *p2;
139 int s1, s2;
140 register int i;
141
142 CHECK_STRING (string, 0);
143 compile_pattern (string, &searchbuf, &search_regs,
144 !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
145
146 immediate_quit = 1;
147 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
148
149 /* Get pointers and sizes of the two strings
150 that make up the visible portion of the buffer. */
151
152 p1 = BEGV_ADDR;
153 s1 = GPT - BEGV;
154 p2 = GAP_END_ADDR;
155 s2 = ZV - GPT;
156 if (s1 < 0)
157 {
158 p2 = p1;
159 s2 = ZV - BEGV;
160 s1 = 0;
161 }
162 if (s2 < 0)
163 {
164 s1 = ZV - BEGV;
165 s2 = 0;
166 }
167
168 i = re_match_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
169 point - BEGV, &search_regs,
170 ZV - BEGV);
171 if (i == -2)
172 matcher_overflow ();
173
174 val = (0 <= i ? Qt : Qnil);
175 for (i = 0; i < search_regs.num_regs; i++)
176 if (search_regs.start[i] >= 0)
177 {
178 search_regs.start[i] += BEGV;
179 search_regs.end[i] += BEGV;
180 }
181 search_regs_from_string = 0;
182 immediate_quit = 0;
183 return val;
184 }
185
186 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
187 "Return index of start of first match for REGEXP in STRING, or nil.\n\
188 If third arg START is non-nil, start search at that index in STRING.\n\
189 For index of first char beyond the match, do (match-end 0).\n\
190 `match-end' and `match-beginning' also give indices of substrings\n\
191 matched by parenthesis constructs in the pattern.")
192 (regexp, string, start)
193 Lisp_Object regexp, string, start;
194 {
195 int val;
196 int s;
197
198 CHECK_STRING (regexp, 0);
199 CHECK_STRING (string, 1);
200
201 if (NILP (start))
202 s = 0;
203 else
204 {
205 int len = XSTRING (string)->size;
206
207 CHECK_NUMBER (start, 2);
208 s = XINT (start);
209 if (s < 0 && -s <= len)
210 s = len - s;
211 else if (0 > s || s > len)
212 args_out_of_range (string, start);
213 }
214
215 compile_pattern (regexp, &searchbuf, &search_regs,
216 !NILP (current_buffer->case_fold_search) ? DOWNCASE_TABLE : 0);
217 immediate_quit = 1;
218 val = re_search (&searchbuf, (char *) XSTRING (string)->data,
219 XSTRING (string)->size, s, XSTRING (string)->size - s,
220 &search_regs);
221 immediate_quit = 0;
222 search_regs_from_string = 1;
223 if (val == -2)
224 matcher_overflow ();
225 if (val < 0) return Qnil;
226 return make_number (val);
227 }
228 \f
229 /* Search for COUNT instances of the character TARGET, starting at START.
230 If COUNT is negative, search backwards.
231
232 If we find COUNT instances, set *SHORTAGE to zero, and return the
233 position of the COUNTth character.
234
235 If we don't find COUNT instances before reaching the end of the
236 buffer (or the beginning, if scanning backwards), set *SHORTAGE to
237 the number of TARGETs left unfound, and return the end of the
238 buffer we bumped up against. */
239
240 scan_buffer (target, start, count, shortage)
241 int *shortage, start;
242 register int count, target;
243 {
244 int limit = ((count > 0) ? ZV - 1 : BEGV);
245 int direction = ((count > 0) ? 1 : -1);
246
247 register unsigned char *cursor;
248 unsigned char *base;
249
250 register int ceiling;
251 register unsigned char *ceiling_addr;
252
253 if (shortage != 0)
254 *shortage = 0;
255
256 immediate_quit = 1;
257
258 if (count > 0)
259 while (start != limit + 1)
260 {
261 ceiling = BUFFER_CEILING_OF (start);
262 ceiling = min (limit, ceiling);
263 ceiling_addr = &FETCH_CHAR (ceiling) + 1;
264 base = (cursor = &FETCH_CHAR (start));
265 while (1)
266 {
267 while (*cursor != target && ++cursor != ceiling_addr)
268 ;
269 if (cursor != ceiling_addr)
270 {
271 if (--count == 0)
272 {
273 immediate_quit = 0;
274 return (start + cursor - base + 1);
275 }
276 else
277 if (++cursor == ceiling_addr)
278 break;
279 }
280 else
281 break;
282 }
283 start += cursor - base;
284 }
285 else
286 {
287 start--; /* first character we scan */
288 while (start > limit - 1)
289 { /* we WILL scan under start */
290 ceiling = BUFFER_FLOOR_OF (start);
291 ceiling = max (limit, ceiling);
292 ceiling_addr = &FETCH_CHAR (ceiling) - 1;
293 base = (cursor = &FETCH_CHAR (start));
294 cursor++;
295 while (1)
296 {
297 while (--cursor != ceiling_addr && *cursor != target)
298 ;
299 if (cursor != ceiling_addr)
300 {
301 if (++count == 0)
302 {
303 immediate_quit = 0;
304 return (start + cursor - base + 1);
305 }
306 }
307 else
308 break;
309 }
310 start += cursor - base;
311 }
312 }
313 immediate_quit = 0;
314 if (shortage != 0)
315 *shortage = count * direction;
316 return (start + ((direction == 1 ? 0 : 1)));
317 }
318
319 int
320 find_next_newline (from, cnt)
321 register int from, cnt;
322 {
323 return (scan_buffer ('\n', from, cnt, (int *) 0));
324 }
325 \f
326 DEFUN ("skip-chars-forward", Fskip_chars_forward, Sskip_chars_forward, 1, 2, 0,
327 "Move point forward, stopping before a char not in CHARS, or at position LIM.\n\
328 CHARS is like the inside of a `[...]' in a regular expression\n\
329 except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
330 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
331 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.")
332 (string, lim)
333 Lisp_Object string, lim;
334 {
335 skip_chars (1, string, lim);
336 return Qnil;
337 }
338
339 DEFUN ("skip-chars-backward", Fskip_chars_backward, Sskip_chars_backward, 1, 2, 0,
340 "Move point backward, stopping after a char not in CHARS, or at position LIM.\n\
341 See `skip-chars-forward' for details.")
342 (string, lim)
343 Lisp_Object string, lim;
344 {
345 skip_chars (0, string, lim);
346 return Qnil;
347 }
348
349 skip_chars (forwardp, string, lim)
350 int forwardp;
351 Lisp_Object string, lim;
352 {
353 register unsigned char *p, *pend;
354 register unsigned char c;
355 unsigned char fastmap[0400];
356 int negate = 0;
357 register int i;
358
359 CHECK_STRING (string, 0);
360
361 if (NILP (lim))
362 XSET (lim, Lisp_Int, forwardp ? ZV : BEGV);
363 else
364 CHECK_NUMBER_COERCE_MARKER (lim, 1);
365
366 #if 0 /* This breaks some things... jla. */
367 /* In any case, don't allow scan outside bounds of buffer. */
368 if (XFASTINT (lim) > ZV)
369 XFASTINT (lim) = ZV;
370 if (XFASTINT (lim) < BEGV)
371 XFASTINT (lim) = BEGV;
372 #endif
373
374 p = XSTRING (string)->data;
375 pend = p + XSTRING (string)->size;
376 bzero (fastmap, sizeof fastmap);
377
378 if (p != pend && *p == '^')
379 {
380 negate = 1; p++;
381 }
382
383 /* Find the characters specified and set their elements of fastmap. */
384
385 while (p != pend)
386 {
387 c = *p++;
388 if (c == '\\')
389 {
390 if (p == pend) break;
391 c = *p++;
392 }
393 if (p != pend && *p == '-')
394 {
395 p++;
396 if (p == pend) break;
397 while (c <= *p)
398 {
399 fastmap[c] = 1;
400 c++;
401 }
402 p++;
403 }
404 else
405 fastmap[c] = 1;
406 }
407
408 /* If ^ was the first character, complement the fastmap. */
409
410 if (negate)
411 for (i = 0; i < sizeof fastmap; i++)
412 fastmap[i] ^= 1;
413
414 immediate_quit = 1;
415 if (forwardp)
416 {
417 while (point < XINT (lim) && fastmap[FETCH_CHAR (point)])
418 SET_PT (point + 1);
419 }
420 else
421 {
422 while (point > XINT (lim) && fastmap[FETCH_CHAR (point - 1)])
423 SET_PT (point - 1);
424 }
425 immediate_quit = 0;
426 }
427 \f
428 /* Subroutines of Lisp buffer search functions. */
429
430 static Lisp_Object
431 search_command (string, bound, noerror, count, direction, RE)
432 Lisp_Object string, bound, noerror, count;
433 int direction;
434 int RE;
435 {
436 register int np;
437 int lim;
438 int n = direction;
439
440 if (!NILP (count))
441 {
442 CHECK_NUMBER (count, 3);
443 n *= XINT (count);
444 }
445
446 CHECK_STRING (string, 0);
447 if (NILP (bound))
448 lim = n > 0 ? ZV : BEGV;
449 else
450 {
451 CHECK_NUMBER_COERCE_MARKER (bound, 1);
452 lim = XINT (bound);
453 if (n > 0 ? lim < point : lim > point)
454 error ("Invalid search bound (wrong side of point)");
455 if (lim > ZV)
456 lim = ZV;
457 if (lim < BEGV)
458 lim = BEGV;
459 }
460
461 np = search_buffer (string, point, lim, n, RE,
462 (!NILP (current_buffer->case_fold_search)
463 ? XSTRING (current_buffer->case_canon_table)->data : 0),
464 (!NILP (current_buffer->case_fold_search)
465 ? XSTRING (current_buffer->case_eqv_table)->data : 0));
466 if (np <= 0)
467 {
468 if (NILP (noerror))
469 return signal_failure (string);
470 if (!EQ (noerror, Qt))
471 {
472 if (lim < BEGV || lim > ZV)
473 abort ();
474 SET_PT (lim);
475 }
476 return Qnil;
477 }
478
479 if (np < BEGV || np > ZV)
480 abort ();
481
482 SET_PT (np);
483
484 return make_number (np);
485 }
486 \f
487 /* search for the n'th occurrence of STRING in the current buffer,
488 starting at position POS and stopping at position LIM,
489 treating PAT as a literal string if RE is false or as
490 a regular expression if RE is true.
491
492 If N is positive, searching is forward and LIM must be greater than POS.
493 If N is negative, searching is backward and LIM must be less than POS.
494
495 Returns -x if only N-x occurrences found (x > 0),
496 or else the position at the beginning of the Nth occurrence
497 (if searching backward) or the end (if searching forward). */
498
499 search_buffer (string, pos, lim, n, RE, trt, inverse_trt)
500 Lisp_Object string;
501 int pos;
502 int lim;
503 int n;
504 int RE;
505 register unsigned char *trt;
506 register unsigned char *inverse_trt;
507 {
508 int len = XSTRING (string)->size;
509 unsigned char *base_pat = XSTRING (string)->data;
510 register int *BM_tab;
511 int *BM_tab_base;
512 register int direction = ((n > 0) ? 1 : -1);
513 register int dirlen;
514 int infinity, limit, k, stride_for_teases;
515 register unsigned char *pat, *cursor, *p_limit;
516 register int i, j;
517 unsigned char *p1, *p2;
518 int s1, s2;
519
520 /* Null string is found at starting position. */
521 if (!len)
522 return pos;
523
524 if (RE)
525 compile_pattern (string, &searchbuf, &search_regs, (char *) trt);
526
527 if (RE /* Here we detect whether the */
528 /* generality of an RE search is */
529 /* really needed. */
530 /* first item is "exact match" */
531 && *(searchbuf.buffer) == (char) RE_EXACTN_VALUE
532 && searchbuf.buffer[1] + 2 == searchbuf.used) /*first is ONLY item */
533 {
534 RE = 0; /* can do straight (non RE) search */
535 pat = (base_pat = (unsigned char *) searchbuf.buffer + 2);
536 /* trt already applied */
537 len = searchbuf.used - 2;
538 }
539 else if (!RE)
540 {
541 pat = (unsigned char *) alloca (len);
542
543 for (i = len; i--;) /* Copy the pattern; apply trt */
544 *pat++ = (((int) trt) ? trt [*base_pat++] : *base_pat++);
545 pat -= len; base_pat = pat;
546 }
547
548 if (RE)
549 {
550 immediate_quit = 1; /* Quit immediately if user types ^G,
551 because letting this function finish
552 can take too long. */
553 QUIT; /* Do a pending quit right away,
554 to avoid paradoxical behavior */
555 /* Get pointers and sizes of the two strings
556 that make up the visible portion of the buffer. */
557
558 p1 = BEGV_ADDR;
559 s1 = GPT - BEGV;
560 p2 = GAP_END_ADDR;
561 s2 = ZV - GPT;
562 if (s1 < 0)
563 {
564 p2 = p1;
565 s2 = ZV - BEGV;
566 s1 = 0;
567 }
568 if (s2 < 0)
569 {
570 s1 = ZV - BEGV;
571 s2 = 0;
572 }
573 while (n < 0)
574 {
575 int val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
576 pos - BEGV, lim - pos, &search_regs,
577 /* Don't allow match past current point */
578 pos - BEGV);
579 if (val == -2)
580 matcher_overflow ();
581 if (val >= 0)
582 {
583 j = BEGV;
584 for (i = 0; i < search_regs.num_regs; i++)
585 if (search_regs.start[i] >= 0)
586 {
587 search_regs.start[i] += j;
588 search_regs.end[i] += j;
589 }
590 search_regs_from_string = 0;
591 /* Set pos to the new position. */
592 pos = search_regs.start[0];
593 }
594 else
595 {
596 immediate_quit = 0;
597 return (n);
598 }
599 n++;
600 }
601 while (n > 0)
602 {
603 int val = re_search_2 (&searchbuf, (char *) p1, s1, (char *) p2, s2,
604 pos - BEGV, lim - pos, &search_regs,
605 lim - BEGV);
606 if (val == -2)
607 matcher_overflow ();
608 if (val >= 0)
609 {
610 j = BEGV;
611 for (i = 0; i < search_regs.num_regs; i++)
612 if (search_regs.start[i] >= 0)
613 {
614 search_regs.start[i] += j;
615 search_regs.end[i] += j;
616 }
617 search_regs_from_string = 0;
618 pos = search_regs.end[0];
619 }
620 else
621 {
622 immediate_quit = 0;
623 return (0 - n);
624 }
625 n--;
626 }
627 immediate_quit = 0;
628 return (pos);
629 }
630 else /* non-RE case */
631 {
632 #ifdef C_ALLOCA
633 int BM_tab_space[0400];
634 BM_tab = &BM_tab_space[0];
635 #else
636 BM_tab = (int *) alloca (0400 * sizeof (int));
637 #endif
638 /* The general approach is that we are going to maintain that we know */
639 /* the first (closest to the present position, in whatever direction */
640 /* we're searching) character that could possibly be the last */
641 /* (furthest from present position) character of a valid match. We */
642 /* advance the state of our knowledge by looking at that character */
643 /* and seeing whether it indeed matches the last character of the */
644 /* pattern. If it does, we take a closer look. If it does not, we */
645 /* move our pointer (to putative last characters) as far as is */
646 /* logically possible. This amount of movement, which I call a */
647 /* stride, will be the length of the pattern if the actual character */
648 /* appears nowhere in the pattern, otherwise it will be the distance */
649 /* from the last occurrence of that character to the end of the */
650 /* pattern. */
651 /* As a coding trick, an enormous stride is coded into the table for */
652 /* characters that match the last character. This allows use of only */
653 /* a single test, a test for having gone past the end of the */
654 /* permissible match region, to test for both possible matches (when */
655 /* the stride goes past the end immediately) and failure to */
656 /* match (where you get nudged past the end one stride at a time). */
657
658 /* Here we make a "mickey mouse" BM table. The stride of the search */
659 /* is determined only by the last character of the putative match. */
660 /* If that character does not match, we will stride the proper */
661 /* distance to propose a match that superimposes it on the last */
662 /* instance of a character that matches it (per trt), or misses */
663 /* it entirely if there is none. */
664
665 dirlen = len * direction;
666 infinity = dirlen - (lim + pos + len + len) * direction;
667 if (direction < 0)
668 pat = (base_pat += len - 1);
669 BM_tab_base = BM_tab;
670 BM_tab += 0400;
671 j = dirlen; /* to get it in a register */
672 /* A character that does not appear in the pattern induces a */
673 /* stride equal to the pattern length. */
674 while (BM_tab_base != BM_tab)
675 {
676 *--BM_tab = j;
677 *--BM_tab = j;
678 *--BM_tab = j;
679 *--BM_tab = j;
680 }
681 i = 0;
682 while (i != infinity)
683 {
684 j = pat[i]; i += direction;
685 if (i == dirlen) i = infinity;
686 if ((int) trt)
687 {
688 k = (j = trt[j]);
689 if (i == infinity)
690 stride_for_teases = BM_tab[j];
691 BM_tab[j] = dirlen - i;
692 /* A translation table is accompanied by its inverse -- see */
693 /* comment following downcase_table for details */
694 while ((j = inverse_trt[j]) != k)
695 BM_tab[j] = dirlen - i;
696 }
697 else
698 {
699 if (i == infinity)
700 stride_for_teases = BM_tab[j];
701 BM_tab[j] = dirlen - i;
702 }
703 /* stride_for_teases tells how much to stride if we get a */
704 /* match on the far character but are subsequently */
705 /* disappointed, by recording what the stride would have been */
706 /* for that character if the last character had been */
707 /* different. */
708 }
709 infinity = dirlen - infinity;
710 pos += dirlen - ((direction > 0) ? direction : 0);
711 /* loop invariant - pos points at where last char (first char if reverse)
712 of pattern would align in a possible match. */
713 while (n != 0)
714 {
715 if ((lim - pos - (direction > 0)) * direction < 0)
716 return (n * (0 - direction));
717 /* First we do the part we can by pointers (maybe nothing) */
718 QUIT;
719 pat = base_pat;
720 limit = pos - dirlen + direction;
721 limit = ((direction > 0)
722 ? BUFFER_CEILING_OF (limit)
723 : BUFFER_FLOOR_OF (limit));
724 /* LIMIT is now the last (not beyond-last!) value
725 POS can take on without hitting edge of buffer or the gap. */
726 limit = ((direction > 0)
727 ? min (lim - 1, min (limit, pos + 20000))
728 : max (lim, max (limit, pos - 20000)));
729 if ((limit - pos) * direction > 20)
730 {
731 p_limit = &FETCH_CHAR (limit);
732 p2 = (cursor = &FETCH_CHAR (pos));
733 /* In this loop, pos + cursor - p2 is the surrogate for pos */
734 while (1) /* use one cursor setting as long as i can */
735 {
736 if (direction > 0) /* worth duplicating */
737 {
738 /* Use signed comparison if appropriate
739 to make cursor+infinity sure to be > p_limit.
740 Assuming that the buffer lies in a range of addresses
741 that are all "positive" (as ints) or all "negative",
742 either kind of comparison will work as long
743 as we don't step by infinity. So pick the kind
744 that works when we do step by infinity. */
745 if ((int) (p_limit + infinity) > (int) p_limit)
746 while ((int) cursor <= (int) p_limit)
747 cursor += BM_tab[*cursor];
748 else
749 while ((unsigned int) cursor <= (unsigned int) p_limit)
750 cursor += BM_tab[*cursor];
751 }
752 else
753 {
754 if ((int) (p_limit + infinity) < (int) p_limit)
755 while ((int) cursor >= (int) p_limit)
756 cursor += BM_tab[*cursor];
757 else
758 while ((unsigned int) cursor >= (unsigned int) p_limit)
759 cursor += BM_tab[*cursor];
760 }
761 /* If you are here, cursor is beyond the end of the searched region. */
762 /* This can happen if you match on the far character of the pattern, */
763 /* because the "stride" of that character is infinity, a number able */
764 /* to throw you well beyond the end of the search. It can also */
765 /* happen if you fail to match within the permitted region and would */
766 /* otherwise try a character beyond that region */
767 if ((cursor - p_limit) * direction <= len)
768 break; /* a small overrun is genuine */
769 cursor -= infinity; /* large overrun = hit */
770 i = dirlen - direction;
771 if ((int) trt)
772 {
773 while ((i -= direction) + direction != 0)
774 if (pat[i] != trt[*(cursor -= direction)])
775 break;
776 }
777 else
778 {
779 while ((i -= direction) + direction != 0)
780 if (pat[i] != *(cursor -= direction))
781 break;
782 }
783 cursor += dirlen - i - direction; /* fix cursor */
784 if (i + direction == 0)
785 {
786 cursor -= direction;
787
788 /* Make sure we have registers in which to store
789 the match position. */
790 if (search_regs.num_regs == 0)
791 {
792 regoff_t *starts, *ends;
793
794 starts =
795 (regoff_t *) xmalloc (2 * sizeof (regoff_t));
796 ends =
797 (regoff_t *) xmalloc (2 * sizeof (regoff_t));
798 re_set_registers (&searchbuf,
799 &search_regs,
800 2, starts, ends);
801 }
802
803 search_regs.start[0]
804 = pos + cursor - p2 + ((direction > 0)
805 ? 1 - len : 0);
806 search_regs.end[0] = len + search_regs.start[0];
807 search_regs_from_string = 0;
808 if ((n -= direction) != 0)
809 cursor += dirlen; /* to resume search */
810 else
811 return ((direction > 0)
812 ? search_regs.end[0] : search_regs.start[0]);
813 }
814 else
815 cursor += stride_for_teases; /* <sigh> we lose - */
816 }
817 pos += cursor - p2;
818 }
819 else
820 /* Now we'll pick up a clump that has to be done the hard */
821 /* way because it covers a discontinuity */
822 {
823 limit = ((direction > 0)
824 ? BUFFER_CEILING_OF (pos - dirlen + 1)
825 : BUFFER_FLOOR_OF (pos - dirlen - 1));
826 limit = ((direction > 0)
827 ? min (limit + len, lim - 1)
828 : max (limit - len, lim));
829 /* LIMIT is now the last value POS can have
830 and still be valid for a possible match. */
831 while (1)
832 {
833 /* This loop can be coded for space rather than */
834 /* speed because it will usually run only once. */
835 /* (the reach is at most len + 21, and typically */
836 /* does not exceed len) */
837 while ((limit - pos) * direction >= 0)
838 pos += BM_tab[FETCH_CHAR(pos)];
839 /* now run the same tests to distinguish going off the */
840 /* end, a match or a phoney match. */
841 if ((pos - limit) * direction <= len)
842 break; /* ran off the end */
843 /* Found what might be a match.
844 Set POS back to last (first if reverse) char pos. */
845 pos -= infinity;
846 i = dirlen - direction;
847 while ((i -= direction) + direction != 0)
848 {
849 pos -= direction;
850 if (pat[i] != (((int) trt)
851 ? trt[FETCH_CHAR(pos)]
852 : FETCH_CHAR (pos)))
853 break;
854 }
855 /* Above loop has moved POS part or all the way
856 back to the first char pos (last char pos if reverse).
857 Set it once again at the last (first if reverse) char. */
858 pos += dirlen - i- direction;
859 if (i + direction == 0)
860 {
861 pos -= direction;
862
863 /* Make sure we have registers in which to store
864 the match position. */
865 if (search_regs.num_regs == 0)
866 {
867 regoff_t *starts, *ends;
868
869 starts =
870 (regoff_t *) xmalloc (2 * sizeof (regoff_t));
871 ends =
872 (regoff_t *) xmalloc (2 * sizeof (regoff_t));
873 re_set_registers (&searchbuf,
874 &search_regs,
875 2, starts, ends);
876 }
877
878 search_regs.start[0]
879 = pos + ((direction > 0) ? 1 - len : 0);
880 search_regs.end[0] = len + search_regs.start[0];
881 search_regs_from_string = 0;
882 if ((n -= direction) != 0)
883 pos += dirlen; /* to resume search */
884 else
885 return ((direction > 0)
886 ? search_regs.end[0] : search_regs.start[0]);
887 }
888 else
889 pos += stride_for_teases;
890 }
891 }
892 /* We have done one clump. Can we continue? */
893 if ((lim - pos) * direction < 0)
894 return ((0 - n) * direction);
895 }
896 return pos;
897 }
898 }
899 \f
900 /* Given a string of words separated by word delimiters,
901 compute a regexp that matches those exact words
902 separated by arbitrary punctuation. */
903
904 static Lisp_Object
905 wordify (string)
906 Lisp_Object string;
907 {
908 register unsigned char *p, *o;
909 register int i, len, punct_count = 0, word_count = 0;
910 Lisp_Object val;
911
912 CHECK_STRING (string, 0);
913 p = XSTRING (string)->data;
914 len = XSTRING (string)->size;
915
916 for (i = 0; i < len; i++)
917 if (SYNTAX (p[i]) != Sword)
918 {
919 punct_count++;
920 if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
921 }
922 if (SYNTAX (p[len-1]) == Sword) word_count++;
923 if (!word_count) return build_string ("");
924
925 val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
926
927 o = XSTRING (val)->data;
928 *o++ = '\\';
929 *o++ = 'b';
930
931 for (i = 0; i < len; i++)
932 if (SYNTAX (p[i]) == Sword)
933 *o++ = p[i];
934 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
935 {
936 *o++ = '\\';
937 *o++ = 'W';
938 *o++ = '\\';
939 *o++ = 'W';
940 *o++ = '*';
941 }
942
943 *o++ = '\\';
944 *o++ = 'b';
945
946 return val;
947 }
948 \f
949 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
950 "sSearch backward: ",
951 "Search backward from point for STRING.\n\
952 Set point to the beginning of the occurrence found, and return point.\n\
953 An optional second argument bounds the search; it is a buffer position.\n\
954 The match found must not extend before that position.\n\
955 Optional third argument, if t, means if fail just return nil (no error).\n\
956 If not nil and not t, position at limit of search and return nil.\n\
957 Optional fourth argument is repeat count--search for successive occurrences.\n\
958 See also the functions `match-beginning', `match-end' and `replace-match'.")
959 (string, bound, noerror, count)
960 Lisp_Object string, bound, noerror, count;
961 {
962 return search_command (string, bound, noerror, count, -1, 0);
963 }
964
965 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "sSearch: ",
966 "Search forward from point for STRING.\n\
967 Set point to the end of the occurrence found, and return point.\n\
968 An optional second argument bounds the search; it is a buffer position.\n\
969 The match found must not extend after that position. nil is equivalent\n\
970 to (point-max).\n\
971 Optional third argument, if t, means if fail just return nil (no error).\n\
972 If not nil and not t, move to limit of search and return nil.\n\
973 Optional fourth argument is repeat count--search for successive occurrences.\n\
974 See also the functions `match-beginning', `match-end' and `replace-match'.")
975 (string, bound, noerror, count)
976 Lisp_Object string, bound, noerror, count;
977 {
978 return search_command (string, bound, noerror, count, 1, 0);
979 }
980
981 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
982 "sWord search backward: ",
983 "Search backward from point for STRING, ignoring differences in punctuation.\n\
984 Set point to the beginning of the occurrence found, and return point.\n\
985 An optional second argument bounds the search; it is a buffer position.\n\
986 The match found must not extend before that position.\n\
987 Optional third argument, if t, means if fail just return nil (no error).\n\
988 If not nil and not t, move to limit of search and return nil.\n\
989 Optional fourth argument is repeat count--search for successive occurrences.")
990 (string, bound, noerror, count)
991 Lisp_Object string, bound, noerror, count;
992 {
993 return search_command (wordify (string), bound, noerror, count, -1, 1);
994 }
995
996 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
997 "sWord search: ",
998 "Search forward from point for STRING, ignoring differences in punctuation.\n\
999 Set point to the end of the occurrence found, and return point.\n\
1000 An optional second argument bounds the search; it is a buffer position.\n\
1001 The match found must not extend after that position.\n\
1002 Optional third argument, if t, means if fail just return nil (no error).\n\
1003 If not nil and not t, move to limit of search and return nil.\n\
1004 Optional fourth argument is repeat count--search for successive occurrences.")
1005 (string, bound, noerror, count)
1006 Lisp_Object string, bound, noerror, count;
1007 {
1008 return search_command (wordify (string), bound, noerror, count, 1, 1);
1009 }
1010
1011 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
1012 "sRE search backward: ",
1013 "Search backward from point for match for regular expression REGEXP.\n\
1014 Set point to the beginning of the match, and return point.\n\
1015 The match found is the one starting last in the buffer\n\
1016 and yet ending before the place the origin of the search.\n\
1017 An optional second argument bounds the search; it is a buffer position.\n\
1018 The match found must start at or after that position.\n\
1019 Optional third argument, if t, means if fail just return nil (no error).\n\
1020 If not nil and not t, move to limit of search and return nil.\n\
1021 Optional fourth argument is repeat count--search for successive occurrences.\n\
1022 See also the functions `match-beginning', `match-end' and `replace-match'.")
1023 (string, bound, noerror, count)
1024 Lisp_Object string, bound, noerror, count;
1025 {
1026 return search_command (string, bound, noerror, count, -1, 1);
1027 }
1028
1029 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
1030 "sRE search: ",
1031 "Search forward from point for regular expression REGEXP.\n\
1032 Set point to the end of the occurrence found, and return point.\n\
1033 An optional second argument bounds the search; it is a buffer position.\n\
1034 The match found must not extend after that position.\n\
1035 Optional third argument, if t, means if fail just return nil (no error).\n\
1036 If not nil and not t, move to limit of search and return nil.\n\
1037 Optional fourth argument is repeat count--search for successive occurrences.\n\
1038 See also the functions `match-beginning', `match-end' and `replace-match'.")
1039 (string, bound, noerror, count)
1040 Lisp_Object string, bound, noerror, count;
1041 {
1042 return search_command (string, bound, noerror, count, 1, 1);
1043 }
1044 \f
1045 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 3, 0,
1046 "Replace text matched by last search with NEWTEXT.\n\
1047 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
1048 Otherwise convert to all caps or cap initials, like replaced text.\n\
1049 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1050 Otherwise treat `\\' as special:\n\
1051 `\\&' in NEWTEXT means substitute original matched text.\n\
1052 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1053 If Nth parens didn't match, substitute nothing.\n\
1054 `\\\\' means insert one `\\'.\n\
1055 FIXEDCASE and LITERAL are optional arguments.\n\
1056 Leaves point at end of replacement text.")
1057 (string, fixedcase, literal)
1058 Lisp_Object string, fixedcase, literal;
1059 {
1060 enum { nochange, all_caps, cap_initial } case_action;
1061 register int pos, last;
1062 int some_multiletter_word;
1063 int some_letter = 0;
1064 register int c, prevc;
1065 int inslen;
1066
1067 CHECK_STRING (string, 0);
1068
1069 case_action = nochange; /* We tried an initialization */
1070 /* but some C compilers blew it */
1071
1072 if (search_regs.num_regs <= 0)
1073 error ("replace-match called before any match found");
1074
1075 if (search_regs.start[0] < BEGV
1076 || search_regs.start[0] > search_regs.end[0]
1077 || search_regs.end[0] > ZV)
1078 args_out_of_range(make_number (search_regs.start[0]),
1079 make_number (search_regs.end[0]));
1080
1081 if (NILP (fixedcase))
1082 {
1083 /* Decide how to casify by examining the matched text. */
1084
1085 last = search_regs.end[0];
1086 prevc = '\n';
1087 case_action = all_caps;
1088
1089 /* some_multiletter_word is set nonzero if any original word
1090 is more than one letter long. */
1091 some_multiletter_word = 0;
1092
1093 for (pos = search_regs.start[0]; pos < last; pos++)
1094 {
1095 c = FETCH_CHAR (pos);
1096 if (LOWERCASEP (c))
1097 {
1098 /* Cannot be all caps if any original char is lower case */
1099
1100 case_action = cap_initial;
1101 if (SYNTAX (prevc) != Sword)
1102 {
1103 /* Cannot even be cap initials
1104 if some original initial is lower case */
1105 case_action = nochange;
1106 break;
1107 }
1108 else
1109 some_multiletter_word = 1;
1110 }
1111 else if (!NOCASEP (c))
1112 {
1113 some_letter = 1;
1114 if (!some_multiletter_word && SYNTAX (prevc) == Sword)
1115 some_multiletter_word = 1;
1116 }
1117
1118 prevc = c;
1119 }
1120
1121 /* Do not make new text all caps
1122 if the original text contained only single letter words. */
1123 if (case_action == all_caps && !some_multiletter_word)
1124 case_action = cap_initial;
1125
1126 if (!some_letter) case_action = nochange;
1127 }
1128
1129 SET_PT (search_regs.end[0]);
1130 if (!NILP (literal))
1131 Finsert (1, &string);
1132 else
1133 {
1134 struct gcpro gcpro1;
1135 GCPRO1 (string);
1136
1137 for (pos = 0; pos < XSTRING (string)->size; pos++)
1138 {
1139 c = XSTRING (string)->data[pos];
1140 if (c == '\\')
1141 {
1142 c = XSTRING (string)->data[++pos];
1143 if (c == '&')
1144 Finsert_buffer_substring (Fcurrent_buffer (),
1145 make_number (search_regs.start[0]),
1146 make_number (search_regs.end[0]));
1147 else if (c >= '1' && c <= search_regs.num_regs + '0')
1148 {
1149 if (search_regs.start[c - '0'] >= 1)
1150 Finsert_buffer_substring (Fcurrent_buffer (),
1151 make_number (search_regs.start[c - '0']),
1152 make_number (search_regs.end[c - '0']));
1153 }
1154 else
1155 insert_char (c);
1156 }
1157 else
1158 insert_char (c);
1159 }
1160 UNGCPRO;
1161 }
1162
1163 inslen = point - (search_regs.end[0]);
1164 del_range (search_regs.start[0], search_regs.end[0]);
1165
1166 if (case_action == all_caps)
1167 Fupcase_region (make_number (point - inslen), make_number (point));
1168 else if (case_action == cap_initial)
1169 upcase_initials_region (make_number (point - inslen), make_number (point));
1170 return Qnil;
1171 }
1172 \f
1173 static Lisp_Object
1174 match_limit (num, beginningp)
1175 Lisp_Object num;
1176 int beginningp;
1177 {
1178 register int n;
1179
1180 CHECK_NUMBER (num, 0);
1181 n = XINT (num);
1182 if (n < 0 || n >= search_regs.num_regs)
1183 args_out_of_range (num, make_number (search_regs.num_regs));
1184 if (search_regs.num_regs <= 0
1185 || search_regs.start[n] < 0)
1186 return Qnil;
1187 return (make_number ((beginningp) ? search_regs.start[n]
1188 : search_regs.end[n]));
1189 }
1190
1191 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1192 "Return position of start of text matched by last search.\n\
1193 ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1194 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1195 Zero means the entire text matched by the whole regexp or whole string.")
1196 (num)
1197 Lisp_Object num;
1198 {
1199 return match_limit (num, 1);
1200 }
1201
1202 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1203 "Return position of end of text matched by last search.\n\
1204 ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1205 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1206 Zero means the entire text matched by the whole regexp or whole string.")
1207 (num)
1208 Lisp_Object num;
1209 {
1210 return match_limit (num, 0);
1211 }
1212
1213 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 0, 0,
1214 "Return a list containing all info on what the last search matched.\n\
1215 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1216 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1217 if the last match was on a buffer; integers or nil if a string was matched.\n\
1218 Use `store-match-data' to reinstate the data in this list.")
1219 ()
1220 {
1221 Lisp_Object *data;
1222 int i, len;
1223
1224 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
1225 * sizeof (Lisp_Object));
1226
1227 len = -1;
1228 for (i = 0; i < search_regs.num_regs; i++)
1229 {
1230 int start = search_regs.start[i];
1231 if (start >= 0)
1232 {
1233 if (search_regs_from_string)
1234 {
1235 XFASTINT (data[2 * i]) = start;
1236 XFASTINT (data[2 * i + 1]) = search_regs.end[i];
1237 }
1238 else
1239 {
1240 data[2 * i] = Fmake_marker ();
1241 Fset_marker (data[2 * i], make_number (start), Qnil);
1242 data[2 * i + 1] = Fmake_marker ();
1243 Fset_marker (data[2 * i + 1],
1244 make_number (search_regs.end[i]), Qnil);
1245 }
1246 len = i;
1247 }
1248 else
1249 data[2 * i] = data [2 * i + 1] = Qnil;
1250 }
1251 return Flist (2 * len + 2, data);
1252 }
1253
1254
1255 DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
1256 "Set internal data on last search match from elements of LIST.\n\
1257 LIST should have been created by calling `match-data' previously.")
1258 (list)
1259 register Lisp_Object list;
1260 {
1261 register int i;
1262 register Lisp_Object marker;
1263
1264 if (!CONSP (list) && !NILP (list))
1265 list = wrong_type_argument (Qconsp, list, 0);
1266
1267 /* Allocate registers if they don't already exist. */
1268 {
1269 int length = Flength (list) / 2;
1270
1271 if (length > search_regs.num_regs)
1272 {
1273 if (search_regs.num_regs == 0)
1274 {
1275 search_regs.start
1276 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1277 search_regs.end
1278 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1279 }
1280 else
1281 {
1282 search_regs.start
1283 = (regoff_t *) xrealloc (search_regs.start,
1284 length * sizeof (regoff_t));
1285 search_regs.end
1286 = (regoff_t *) xrealloc (search_regs.end,
1287 length * sizeof (regoff_t));
1288 }
1289
1290 re_set_registers (&searchbuf, &search_regs, length,
1291 search_regs.start, search_regs.end);
1292 }
1293 }
1294
1295 for (i = 0; i < search_regs.num_regs; i++)
1296 {
1297 marker = Fcar (list);
1298 if (NILP (marker))
1299 {
1300 search_regs.start[i] = -1;
1301 list = Fcdr (list);
1302 }
1303 else
1304 {
1305 if (XTYPE (marker) == Lisp_Marker
1306 && XMARKER (marker)->buffer == 0)
1307 XFASTINT (marker) = 0;
1308
1309 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1310 search_regs.start[i] = XINT (marker);
1311 list = Fcdr (list);
1312
1313 marker = Fcar (list);
1314 if (XTYPE (marker) == Lisp_Marker
1315 && XMARKER (marker)->buffer == 0)
1316 XFASTINT (marker) = 0;
1317
1318 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1319 search_regs.end[i] = XINT (marker);
1320 }
1321 list = Fcdr (list);
1322 }
1323
1324 return Qnil;
1325 }
1326
1327 /* Quote a string to inactivate reg-expr chars */
1328
1329 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
1330 "Return a regexp string which matches exactly STRING and nothing else.")
1331 (str)
1332 Lisp_Object str;
1333 {
1334 register unsigned char *in, *out, *end;
1335 register unsigned char *temp;
1336
1337 CHECK_STRING (str, 0);
1338
1339 temp = (unsigned char *) alloca (XSTRING (str)->size * 2);
1340
1341 /* Now copy the data into the new string, inserting escapes. */
1342
1343 in = XSTRING (str)->data;
1344 end = in + XSTRING (str)->size;
1345 out = temp;
1346
1347 for (; in != end; in++)
1348 {
1349 if (*in == '[' || *in == ']'
1350 || *in == '*' || *in == '.' || *in == '\\'
1351 || *in == '?' || *in == '+'
1352 || *in == '^' || *in == '$')
1353 *out++ = '\\';
1354 *out++ = *in;
1355 }
1356
1357 return make_string (temp, out - temp);
1358 }
1359 \f
1360 syms_of_search ()
1361 {
1362 register int i;
1363
1364 searchbuf.allocated = 100;
1365 searchbuf.buffer = (unsigned char *) malloc (searchbuf.allocated);
1366 searchbuf.fastmap = search_fastmap;
1367
1368 Qsearch_failed = intern ("search-failed");
1369 staticpro (&Qsearch_failed);
1370 Qinvalid_regexp = intern ("invalid-regexp");
1371 staticpro (&Qinvalid_regexp);
1372
1373 Fput (Qsearch_failed, Qerror_conditions,
1374 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
1375 Fput (Qsearch_failed, Qerror_message,
1376 build_string ("Search failed"));
1377
1378 Fput (Qinvalid_regexp, Qerror_conditions,
1379 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
1380 Fput (Qinvalid_regexp, Qerror_message,
1381 build_string ("Invalid regexp"));
1382
1383 last_regexp = Qnil;
1384 staticpro (&last_regexp);
1385
1386 defsubr (&Sstring_match);
1387 defsubr (&Slooking_at);
1388 defsubr (&Sskip_chars_forward);
1389 defsubr (&Sskip_chars_backward);
1390 defsubr (&Ssearch_forward);
1391 defsubr (&Ssearch_backward);
1392 defsubr (&Sword_search_forward);
1393 defsubr (&Sword_search_backward);
1394 defsubr (&Sre_search_forward);
1395 defsubr (&Sre_search_backward);
1396 defsubr (&Sreplace_match);
1397 defsubr (&Smatch_beginning);
1398 defsubr (&Smatch_end);
1399 defsubr (&Smatch_data);
1400 defsubr (&Sstore_match_data);
1401 defsubr (&Sregexp_quote);
1402 }