1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1992 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 1, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, 675 Mass Ave, Cambridge, MA 02139, USA. */
26 #include "blockinput.h"
28 #include <sys/types.h>
31 #define max(a, b) ((a) > (b) ? (a) : (b))
32 #define min(a, b) ((a) < (b) ? (a) : (b))
34 /* We compile regexps into this buffer and then use it for searching. */
36 struct re_pattern_buffer searchbuf
;
38 char search_fastmap
[0400];
40 /* Last regexp we compiled */
42 Lisp_Object last_regexp
;
44 /* Every call to re_match, etc., must pass &search_regs as the regs
45 argument unless you can show it is unnecessary (i.e., if re_match
46 is certainly going to be called again before region-around-match
49 Since the registers are now dynamically allocated, we need to make
50 sure not to refer to the Nth register before checking that it has
51 been allocated by checking search_regs.num_regs.
53 The regex code keeps track of whether it has allocated the search
54 buffer using bits in searchbuf. This means that whenever you
55 compile a new pattern, it completely forgets whether it has
56 allocated any registers, and will allocate new registers the next
57 time you call a searching or matching function. Therefore, we need
58 to call re_set_registers after compiling a new pattern or after
59 setting the match registers, so that the regex functions will be
60 able to free or re-allocate it properly. */
61 static struct re_registers search_regs
;
63 /* The buffer in which the last search was performed, or
64 Qt if the last search was done in a string;
65 Qnil if no searching has been done yet. */
66 static Lisp_Object last_thing_searched
;
68 /* error condition signalled when regexp compile_pattern fails */
70 Lisp_Object Qinvalid_regexp
;
75 error ("Stack overflow in regexp matcher");
84 /* Compile a regexp and signal a Lisp error if anything goes wrong. */
86 compile_pattern (pattern
, bufp
, regp
, translate
)
88 struct re_pattern_buffer
*bufp
;
89 struct re_registers
*regp
;
95 if (EQ (pattern
, last_regexp
)
96 && translate
== bufp
->translate
)
100 bufp
->translate
= translate
;
102 val
= re_compile_pattern ((char *) XSTRING (pattern
)->data
,
103 XSTRING (pattern
)->size
,
108 dummy
= build_string (val
);
110 Fsignal (Qinvalid_regexp
, Fcons (dummy
, Qnil
));
113 last_regexp
= pattern
;
115 /* Advise the searching functions about the space we have allocated
116 for register data. */
119 re_set_registers (bufp
, regp
, regp
->num_regs
, regp
->start
, regp
->end
);
125 /* Error condition used for failing searches */
126 Lisp_Object Qsearch_failed
;
132 Fsignal (Qsearch_failed
, Fcons (arg
, Qnil
));
136 DEFUN ("looking-at", Flooking_at
, Slooking_at
, 1, 1, 0,
137 "Return t if text after point matches regular expression PAT.\n\
138 This function modifies the match data that `match-beginning',\n\
139 `match-end' and `match-data' access; save and restore the match\n\
140 data if you want to preserve them.")
145 unsigned char *p1
, *p2
;
149 CHECK_STRING (string
, 0);
150 compile_pattern (string
, &searchbuf
, &search_regs
,
151 !NILP (current_buffer
->case_fold_search
) ? DOWNCASE_TABLE
: 0);
154 QUIT
; /* Do a pending quit right away, to avoid paradoxical behavior */
156 /* Get pointers and sizes of the two strings
157 that make up the visible portion of the buffer. */
176 i
= re_match_2 (&searchbuf
, (char *) p1
, s1
, (char *) p2
, s2
,
177 point
- BEGV
, &search_regs
,
183 val
= (0 <= i
? Qt
: Qnil
);
184 for (i
= 0; i
< search_regs
.num_regs
; i
++)
185 if (search_regs
.start
[i
] >= 0)
187 search_regs
.start
[i
] += BEGV
;
188 search_regs
.end
[i
] += BEGV
;
190 XSET (last_thing_searched
, Lisp_Buffer
, current_buffer
);
195 DEFUN ("string-match", Fstring_match
, Sstring_match
, 2, 3, 0,
196 "Return index of start of first match for REGEXP in STRING, or nil.\n\
197 If third arg START is non-nil, start search at that index in STRING.\n\
198 For index of first char beyond the match, do (match-end 0).\n\
199 `match-end' and `match-beginning' also give indices of substrings\n\
200 matched by parenthesis constructs in the pattern.")
201 (regexp
, string
, start
)
202 Lisp_Object regexp
, string
, start
;
207 CHECK_STRING (regexp
, 0);
208 CHECK_STRING (string
, 1);
214 int len
= XSTRING (string
)->size
;
216 CHECK_NUMBER (start
, 2);
218 if (s
< 0 && -s
<= len
)
220 else if (0 > s
|| s
> len
)
221 args_out_of_range (string
, start
);
224 compile_pattern (regexp
, &searchbuf
, &search_regs
,
225 !NILP (current_buffer
->case_fold_search
) ? DOWNCASE_TABLE
: 0);
228 val
= re_search (&searchbuf
, (char *) XSTRING (string
)->data
,
229 XSTRING (string
)->size
, s
, XSTRING (string
)->size
- s
,
233 last_thing_searched
= Qt
;
236 if (val
< 0) return Qnil
;
237 return make_number (val
);
240 /* Match REGEXP against STRING, searching all of STRING,
241 and return the index of the match, or negative on failure.
242 This does not clobber the match data. */
245 fast_string_match (regexp
, string
)
246 Lisp_Object regexp
, string
;
250 compile_pattern (regexp
, &searchbuf
, 0, 0);
253 val
= re_search (&searchbuf
, (char *) XSTRING (string
)->data
,
254 XSTRING (string
)->size
, 0, XSTRING (string
)->size
,
261 /* Search for COUNT instances of the character TARGET, starting at START.
262 If COUNT is negative, search backwards.
264 If we find COUNT instances, set *SHORTAGE to zero, and return the
265 position after the COUNTth match. Note that for reverse motion
266 this is not the same as the usual convention for Emacs motion commands.
268 If we don't find COUNT instances before reaching the end of the
269 buffer (or the beginning, if scanning backwards), set *SHORTAGE to
270 the number of TARGETs left unfound, and return the end of the
271 buffer we bumped up against. */
273 scan_buffer (target
, start
, count
, shortage
)
274 int *shortage
, start
;
275 register int count
, target
;
277 int limit
= ((count
> 0) ? ZV
- 1 : BEGV
);
278 int direction
= ((count
> 0) ? 1 : -1);
280 register unsigned char *cursor
;
283 register int ceiling
;
284 register unsigned char *ceiling_addr
;
292 while (start
!= limit
+ 1)
294 ceiling
= BUFFER_CEILING_OF (start
);
295 ceiling
= min (limit
, ceiling
);
296 ceiling_addr
= &FETCH_CHAR (ceiling
) + 1;
297 base
= (cursor
= &FETCH_CHAR (start
));
300 while (*cursor
!= target
&& ++cursor
!= ceiling_addr
)
302 if (cursor
!= ceiling_addr
)
307 return (start
+ cursor
- base
+ 1);
310 if (++cursor
== ceiling_addr
)
316 start
+= cursor
- base
;
320 start
--; /* first character we scan */
321 while (start
> limit
- 1)
322 { /* we WILL scan under start */
323 ceiling
= BUFFER_FLOOR_OF (start
);
324 ceiling
= max (limit
, ceiling
);
325 ceiling_addr
= &FETCH_CHAR (ceiling
) - 1;
326 base
= (cursor
= &FETCH_CHAR (start
));
330 while (--cursor
!= ceiling_addr
&& *cursor
!= target
)
332 if (cursor
!= ceiling_addr
)
337 return (start
+ cursor
- base
+ 1);
343 start
+= cursor
- base
;
348 *shortage
= count
* direction
;
349 return (start
+ ((direction
== 1 ? 0 : 1)));
353 find_next_newline (from
, cnt
)
354 register int from
, cnt
;
356 return (scan_buffer ('\n', from
, cnt
, (int *) 0));
359 Lisp_Object
skip_chars ();
361 DEFUN ("skip-chars-forward", Fskip_chars_forward
, Sskip_chars_forward
, 1, 2, 0,
362 "Move point forward, stopping before a char not in CHARS, or at position LIM.\n\
363 CHARS is like the inside of a `[...]' in a regular expression\n\
364 except that `]' is never special and `\\' quotes `^', `-' or `\\'.\n\
365 Thus, with arg \"a-zA-Z\", this skips letters stopping before first nonletter.\n\
366 With arg \"^a-zA-Z\", skips nonletters stopping before first letter.\n\
367 Returns the distance traveled, either zero or positive.")
369 Lisp_Object string
, lim
;
371 return skip_chars (1, 0, string
, lim
);
374 DEFUN ("skip-chars-backward", Fskip_chars_backward
, Sskip_chars_backward
, 1, 2, 0,
375 "Move point backward, stopping after a char not in CHARS, or at position LIM.\n\
376 See `skip-chars-forward' for details.\n\
377 Returns the distance traveled, either zero or negative.")
379 Lisp_Object string
, lim
;
381 return skip_chars (0, 0, string
, lim
);
384 DEFUN ("skip-syntax-forward", Fskip_syntax_forward
, Sskip_syntax_forward
, 1, 2, 0,
385 "Move point forward across chars in specified syntax classes.\n\
386 SYNTAX is a string of syntax code characters.\n\
387 Stop before a char whose syntax is not in SYNTAX, or at position LIM.\n\
388 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
389 This function returns the distance traveled, either zero or positive.")
391 Lisp_Object syntax
, lim
;
393 return skip_chars (1, 1, syntax
, lim
);
396 DEFUN ("skip-syntax-backward", Fskip_syntax_backward
, Sskip_syntax_backward
, 1, 2, 0,
397 "Move point backward across chars in specified syntax classes.\n\
398 SYNTAX is a string of syntax code characters.\n\
399 Stop on reaching a char whose syntax is not in SYNTAX, or at position LIM.\n\
400 If SYNTAX starts with ^, skip characters whose syntax is NOT in SYNTAX.\n\
401 This function returns the distance traveled, either zero or negative.")
403 Lisp_Object syntax
, lim
;
405 return skip_chars (0, 1, syntax
, lim
);
409 skip_chars (forwardp
, syntaxp
, string
, lim
)
410 int forwardp
, syntaxp
;
411 Lisp_Object string
, lim
;
413 register unsigned char *p
, *pend
;
414 register unsigned char c
;
415 unsigned char fastmap
[0400];
419 CHECK_STRING (string
, 0);
422 XSET (lim
, Lisp_Int
, forwardp
? ZV
: BEGV
);
424 CHECK_NUMBER_COERCE_MARKER (lim
, 1);
426 #if 0 /* This breaks some things... jla. */
427 /* In any case, don't allow scan outside bounds of buffer. */
428 if (XFASTINT (lim
) > ZV
)
430 if (XFASTINT (lim
) < BEGV
)
431 XFASTINT (lim
) = BEGV
;
434 p
= XSTRING (string
)->data
;
435 pend
= p
+ XSTRING (string
)->size
;
436 bzero (fastmap
, sizeof fastmap
);
438 if (p
!= pend
&& *p
== '^')
443 /* Find the characters specified and set their elements of fastmap.
444 If syntaxp, each character counts as itself.
445 Otherwise, handle backslashes and ranges specially */
456 if (p
== pend
) break;
459 if (p
!= pend
&& *p
== '-')
462 if (p
== pend
) break;
475 /* If ^ was the first character, complement the fastmap. */
478 for (i
= 0; i
< sizeof fastmap
; i
++)
482 int start_point
= point
;
490 while (point
< XINT (lim
)
491 && fastmap
[(unsigned char) syntax_code_spec
[(int) SYNTAX (FETCH_CHAR (point
))]])
496 while (point
> XINT (lim
)
497 && fastmap
[(unsigned char) syntax_code_spec
[(int) SYNTAX (FETCH_CHAR (point
- 1))]])
505 while (point
< XINT (lim
) && fastmap
[FETCH_CHAR (point
)])
510 while (point
> XINT (lim
) && fastmap
[FETCH_CHAR (point
- 1)])
516 return make_number (point
- start_point
);
520 /* Subroutines of Lisp buffer search functions. */
523 search_command (string
, bound
, noerror
, count
, direction
, RE
)
524 Lisp_Object string
, bound
, noerror
, count
;
534 CHECK_NUMBER (count
, 3);
538 CHECK_STRING (string
, 0);
540 lim
= n
> 0 ? ZV
: BEGV
;
543 CHECK_NUMBER_COERCE_MARKER (bound
, 1);
545 if (n
> 0 ? lim
< point
: lim
> point
)
546 error ("Invalid search bound (wrong side of point)");
553 np
= search_buffer (string
, point
, lim
, n
, RE
,
554 (!NILP (current_buffer
->case_fold_search
)
555 ? XSTRING (current_buffer
->case_canon_table
)->data
: 0),
556 (!NILP (current_buffer
->case_fold_search
)
557 ? XSTRING (current_buffer
->case_eqv_table
)->data
: 0));
561 return signal_failure (string
);
562 if (!EQ (noerror
, Qt
))
564 if (lim
< BEGV
|| lim
> ZV
)
568 #if 0 /* This would be clean, but maybe programs depend on
569 a value of nil here. */
577 if (np
< BEGV
|| np
> ZV
)
582 return make_number (np
);
585 /* search for the n'th occurrence of STRING in the current buffer,
586 starting at position POS and stopping at position LIM,
587 treating PAT as a literal string if RE is false or as
588 a regular expression if RE is true.
590 If N is positive, searching is forward and LIM must be greater than POS.
591 If N is negative, searching is backward and LIM must be less than POS.
593 Returns -x if only N-x occurrences found (x > 0),
594 or else the position at the beginning of the Nth occurrence
595 (if searching backward) or the end (if searching forward). */
597 search_buffer (string
, pos
, lim
, n
, RE
, trt
, inverse_trt
)
603 register unsigned char *trt
;
604 register unsigned char *inverse_trt
;
606 int len
= XSTRING (string
)->size
;
607 unsigned char *base_pat
= XSTRING (string
)->data
;
608 register int *BM_tab
;
610 register int direction
= ((n
> 0) ? 1 : -1);
612 int infinity
, limit
, k
, stride_for_teases
;
613 register unsigned char *pat
, *cursor
, *p_limit
;
615 unsigned char *p1
, *p2
;
618 /* Null string is found at starting position. */
623 compile_pattern (string
, &searchbuf
, &search_regs
, (char *) trt
);
625 if (RE
/* Here we detect whether the */
626 /* generality of an RE search is */
628 /* first item is "exact match" */
629 && *(searchbuf
.buffer
) == (char) RE_EXACTN_VALUE
630 && searchbuf
.buffer
[1] + 2 == searchbuf
.used
) /*first is ONLY item */
632 RE
= 0; /* can do straight (non RE) search */
633 pat
= (base_pat
= (unsigned char *) searchbuf
.buffer
+ 2);
634 /* trt already applied */
635 len
= searchbuf
.used
- 2;
639 pat
= (unsigned char *) alloca (len
);
641 for (i
= len
; i
--;) /* Copy the pattern; apply trt */
642 *pat
++ = (((int) trt
) ? trt
[*base_pat
++] : *base_pat
++);
643 pat
-= len
; base_pat
= pat
;
648 immediate_quit
= 1; /* Quit immediately if user types ^G,
649 because letting this function finish
650 can take too long. */
651 QUIT
; /* Do a pending quit right away,
652 to avoid paradoxical behavior */
653 /* Get pointers and sizes of the two strings
654 that make up the visible portion of the buffer. */
675 val
= re_search_2 (&searchbuf
, (char *) p1
, s1
, (char *) p2
, s2
,
676 pos
- BEGV
, lim
- pos
, &search_regs
,
677 /* Don't allow match past current point */
685 for (i
= 0; i
< search_regs
.num_regs
; i
++)
686 if (search_regs
.start
[i
] >= 0)
688 search_regs
.start
[i
] += j
;
689 search_regs
.end
[i
] += j
;
691 XSET (last_thing_searched
, Lisp_Buffer
, current_buffer
);
692 /* Set pos to the new position. */
693 pos
= search_regs
.start
[0];
706 val
= re_search_2 (&searchbuf
, (char *) p1
, s1
, (char *) p2
, s2
,
707 pos
- BEGV
, lim
- pos
, &search_regs
,
715 for (i
= 0; i
< search_regs
.num_regs
; i
++)
716 if (search_regs
.start
[i
] >= 0)
718 search_regs
.start
[i
] += j
;
719 search_regs
.end
[i
] += j
;
721 XSET (last_thing_searched
, Lisp_Buffer
, current_buffer
);
722 pos
= search_regs
.end
[0];
734 else /* non-RE case */
737 int BM_tab_space
[0400];
738 BM_tab
= &BM_tab_space
[0];
740 BM_tab
= (int *) alloca (0400 * sizeof (int));
742 /* The general approach is that we are going to maintain that we know */
743 /* the first (closest to the present position, in whatever direction */
744 /* we're searching) character that could possibly be the last */
745 /* (furthest from present position) character of a valid match. We */
746 /* advance the state of our knowledge by looking at that character */
747 /* and seeing whether it indeed matches the last character of the */
748 /* pattern. If it does, we take a closer look. If it does not, we */
749 /* move our pointer (to putative last characters) as far as is */
750 /* logically possible. This amount of movement, which I call a */
751 /* stride, will be the length of the pattern if the actual character */
752 /* appears nowhere in the pattern, otherwise it will be the distance */
753 /* from the last occurrence of that character to the end of the */
755 /* As a coding trick, an enormous stride is coded into the table for */
756 /* characters that match the last character. This allows use of only */
757 /* a single test, a test for having gone past the end of the */
758 /* permissible match region, to test for both possible matches (when */
759 /* the stride goes past the end immediately) and failure to */
760 /* match (where you get nudged past the end one stride at a time). */
762 /* Here we make a "mickey mouse" BM table. The stride of the search */
763 /* is determined only by the last character of the putative match. */
764 /* If that character does not match, we will stride the proper */
765 /* distance to propose a match that superimposes it on the last */
766 /* instance of a character that matches it (per trt), or misses */
767 /* it entirely if there is none. */
769 dirlen
= len
* direction
;
770 infinity
= dirlen
- (lim
+ pos
+ len
+ len
) * direction
;
772 pat
= (base_pat
+= len
- 1);
773 BM_tab_base
= BM_tab
;
775 j
= dirlen
; /* to get it in a register */
776 /* A character that does not appear in the pattern induces a */
777 /* stride equal to the pattern length. */
778 while (BM_tab_base
!= BM_tab
)
786 while (i
!= infinity
)
788 j
= pat
[i
]; i
+= direction
;
789 if (i
== dirlen
) i
= infinity
;
794 stride_for_teases
= BM_tab
[j
];
795 BM_tab
[j
] = dirlen
- i
;
796 /* A translation table is accompanied by its inverse -- see */
797 /* comment following downcase_table for details */
798 while ((j
= inverse_trt
[j
]) != k
)
799 BM_tab
[j
] = dirlen
- i
;
804 stride_for_teases
= BM_tab
[j
];
805 BM_tab
[j
] = dirlen
- i
;
807 /* stride_for_teases tells how much to stride if we get a */
808 /* match on the far character but are subsequently */
809 /* disappointed, by recording what the stride would have been */
810 /* for that character if the last character had been */
813 infinity
= dirlen
- infinity
;
814 pos
+= dirlen
- ((direction
> 0) ? direction
: 0);
815 /* loop invariant - pos points at where last char (first char if reverse)
816 of pattern would align in a possible match. */
819 if ((lim
- pos
- (direction
> 0)) * direction
< 0)
820 return (n
* (0 - direction
));
821 /* First we do the part we can by pointers (maybe nothing) */
824 limit
= pos
- dirlen
+ direction
;
825 limit
= ((direction
> 0)
826 ? BUFFER_CEILING_OF (limit
)
827 : BUFFER_FLOOR_OF (limit
));
828 /* LIMIT is now the last (not beyond-last!) value
829 POS can take on without hitting edge of buffer or the gap. */
830 limit
= ((direction
> 0)
831 ? min (lim
- 1, min (limit
, pos
+ 20000))
832 : max (lim
, max (limit
, pos
- 20000)));
833 if ((limit
- pos
) * direction
> 20)
835 p_limit
= &FETCH_CHAR (limit
);
836 p2
= (cursor
= &FETCH_CHAR (pos
));
837 /* In this loop, pos + cursor - p2 is the surrogate for pos */
838 while (1) /* use one cursor setting as long as i can */
840 if (direction
> 0) /* worth duplicating */
842 /* Use signed comparison if appropriate
843 to make cursor+infinity sure to be > p_limit.
844 Assuming that the buffer lies in a range of addresses
845 that are all "positive" (as ints) or all "negative",
846 either kind of comparison will work as long
847 as we don't step by infinity. So pick the kind
848 that works when we do step by infinity. */
849 if ((int) (p_limit
+ infinity
) > (int) p_limit
)
850 while ((int) cursor
<= (int) p_limit
)
851 cursor
+= BM_tab
[*cursor
];
853 while ((unsigned int) cursor
<= (unsigned int) p_limit
)
854 cursor
+= BM_tab
[*cursor
];
858 if ((int) (p_limit
+ infinity
) < (int) p_limit
)
859 while ((int) cursor
>= (int) p_limit
)
860 cursor
+= BM_tab
[*cursor
];
862 while ((unsigned int) cursor
>= (unsigned int) p_limit
)
863 cursor
+= BM_tab
[*cursor
];
865 /* If you are here, cursor is beyond the end of the searched region. */
866 /* This can happen if you match on the far character of the pattern, */
867 /* because the "stride" of that character is infinity, a number able */
868 /* to throw you well beyond the end of the search. It can also */
869 /* happen if you fail to match within the permitted region and would */
870 /* otherwise try a character beyond that region */
871 if ((cursor
- p_limit
) * direction
<= len
)
872 break; /* a small overrun is genuine */
873 cursor
-= infinity
; /* large overrun = hit */
874 i
= dirlen
- direction
;
877 while ((i
-= direction
) + direction
!= 0)
878 if (pat
[i
] != trt
[*(cursor
-= direction
)])
883 while ((i
-= direction
) + direction
!= 0)
884 if (pat
[i
] != *(cursor
-= direction
))
887 cursor
+= dirlen
- i
- direction
; /* fix cursor */
888 if (i
+ direction
== 0)
892 /* Make sure we have registers in which to store
893 the match position. */
894 if (search_regs
.num_regs
== 0)
896 regoff_t
*starts
, *ends
;
899 (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
901 (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
903 re_set_registers (&searchbuf
,
910 = pos
+ cursor
- p2
+ ((direction
> 0)
912 search_regs
.end
[0] = len
+ search_regs
.start
[0];
913 XSET (last_thing_searched
, Lisp_Buffer
, current_buffer
);
914 if ((n
-= direction
) != 0)
915 cursor
+= dirlen
; /* to resume search */
917 return ((direction
> 0)
918 ? search_regs
.end
[0] : search_regs
.start
[0]);
921 cursor
+= stride_for_teases
; /* <sigh> we lose - */
926 /* Now we'll pick up a clump that has to be done the hard */
927 /* way because it covers a discontinuity */
929 limit
= ((direction
> 0)
930 ? BUFFER_CEILING_OF (pos
- dirlen
+ 1)
931 : BUFFER_FLOOR_OF (pos
- dirlen
- 1));
932 limit
= ((direction
> 0)
933 ? min (limit
+ len
, lim
- 1)
934 : max (limit
- len
, lim
));
935 /* LIMIT is now the last value POS can have
936 and still be valid for a possible match. */
939 /* This loop can be coded for space rather than */
940 /* speed because it will usually run only once. */
941 /* (the reach is at most len + 21, and typically */
942 /* does not exceed len) */
943 while ((limit
- pos
) * direction
>= 0)
944 pos
+= BM_tab
[FETCH_CHAR(pos
)];
945 /* now run the same tests to distinguish going off the */
946 /* end, a match or a phoney match. */
947 if ((pos
- limit
) * direction
<= len
)
948 break; /* ran off the end */
949 /* Found what might be a match.
950 Set POS back to last (first if reverse) char pos. */
952 i
= dirlen
- direction
;
953 while ((i
-= direction
) + direction
!= 0)
956 if (pat
[i
] != (((int) trt
)
957 ? trt
[FETCH_CHAR(pos
)]
961 /* Above loop has moved POS part or all the way
962 back to the first char pos (last char pos if reverse).
963 Set it once again at the last (first if reverse) char. */
964 pos
+= dirlen
- i
- direction
;
965 if (i
+ direction
== 0)
969 /* Make sure we have registers in which to store
970 the match position. */
971 if (search_regs
.num_regs
== 0)
973 regoff_t
*starts
, *ends
;
976 (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
978 (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
980 re_set_registers (&searchbuf
,
987 = pos
+ ((direction
> 0) ? 1 - len
: 0);
988 search_regs
.end
[0] = len
+ search_regs
.start
[0];
989 XSET (last_thing_searched
, Lisp_Buffer
, current_buffer
);
990 if ((n
-= direction
) != 0)
991 pos
+= dirlen
; /* to resume search */
993 return ((direction
> 0)
994 ? search_regs
.end
[0] : search_regs
.start
[0]);
997 pos
+= stride_for_teases
;
1000 /* We have done one clump. Can we continue? */
1001 if ((lim
- pos
) * direction
< 0)
1002 return ((0 - n
) * direction
);
1008 /* Given a string of words separated by word delimiters,
1009 compute a regexp that matches those exact words
1010 separated by arbitrary punctuation. */
1016 register unsigned char *p
, *o
;
1017 register int i
, len
, punct_count
= 0, word_count
= 0;
1020 CHECK_STRING (string
, 0);
1021 p
= XSTRING (string
)->data
;
1022 len
= XSTRING (string
)->size
;
1024 for (i
= 0; i
< len
; i
++)
1025 if (SYNTAX (p
[i
]) != Sword
)
1028 if (i
> 0 && SYNTAX (p
[i
-1]) == Sword
) word_count
++;
1030 if (SYNTAX (p
[len
-1]) == Sword
) word_count
++;
1031 if (!word_count
) return build_string ("");
1033 val
= make_string (p
, len
- punct_count
+ 5 * (word_count
- 1) + 4);
1035 o
= XSTRING (val
)->data
;
1039 for (i
= 0; i
< len
; i
++)
1040 if (SYNTAX (p
[i
]) == Sword
)
1042 else if (i
> 0 && SYNTAX (p
[i
-1]) == Sword
&& --word_count
)
1057 DEFUN ("search-backward", Fsearch_backward
, Ssearch_backward
, 1, 4,
1058 "sSearch backward: ",
1059 "Search backward from point for STRING.\n\
1060 Set point to the beginning of the occurrence found, and return point.\n\
1061 An optional second argument bounds the search; it is a buffer position.\n\
1062 The match found must not extend before that position.\n\
1063 Optional third argument, if t, means if fail just return nil (no error).\n\
1064 If not nil and not t, position at limit of search and return nil.\n\
1065 Optional fourth argument is repeat count--search for successive occurrences.\n\
1066 See also the functions `match-beginning', `match-end' and `replace-match'.")
1067 (string
, bound
, noerror
, count
)
1068 Lisp_Object string
, bound
, noerror
, count
;
1070 return search_command (string
, bound
, noerror
, count
, -1, 0);
1073 DEFUN ("search-forward", Fsearch_forward
, Ssearch_forward
, 1, 4, "sSearch: ",
1074 "Search forward from point for STRING.\n\
1075 Set point to the end of the occurrence found, and return point.\n\
1076 An optional second argument bounds the search; it is a buffer position.\n\
1077 The match found must not extend after that position. nil is equivalent\n\
1079 Optional third argument, if t, means if fail just return nil (no error).\n\
1080 If not nil and not t, move to limit of search and return nil.\n\
1081 Optional fourth argument is repeat count--search for successive occurrences.\n\
1082 See also the functions `match-beginning', `match-end' and `replace-match'.")
1083 (string
, bound
, noerror
, count
)
1084 Lisp_Object string
, bound
, noerror
, count
;
1086 return search_command (string
, bound
, noerror
, count
, 1, 0);
1089 DEFUN ("word-search-backward", Fword_search_backward
, Sword_search_backward
, 1, 4,
1090 "sWord search backward: ",
1091 "Search backward from point for STRING, ignoring differences in punctuation.\n\
1092 Set point to the beginning of the occurrence found, and return point.\n\
1093 An optional second argument bounds the search; it is a buffer position.\n\
1094 The match found must not extend before that position.\n\
1095 Optional third argument, if t, means if fail just return nil (no error).\n\
1096 If not nil and not t, move to limit of search and return nil.\n\
1097 Optional fourth argument is repeat count--search for successive occurrences.")
1098 (string
, bound
, noerror
, count
)
1099 Lisp_Object string
, bound
, noerror
, count
;
1101 return search_command (wordify (string
), bound
, noerror
, count
, -1, 1);
1104 DEFUN ("word-search-forward", Fword_search_forward
, Sword_search_forward
, 1, 4,
1106 "Search forward from point for STRING, ignoring differences in punctuation.\n\
1107 Set point to the end of the occurrence found, and return point.\n\
1108 An optional second argument bounds the search; it is a buffer position.\n\
1109 The match found must not extend after that position.\n\
1110 Optional third argument, if t, means if fail just return nil (no error).\n\
1111 If not nil and not t, move to limit of search and return nil.\n\
1112 Optional fourth argument is repeat count--search for successive occurrences.")
1113 (string
, bound
, noerror
, count
)
1114 Lisp_Object string
, bound
, noerror
, count
;
1116 return search_command (wordify (string
), bound
, noerror
, count
, 1, 1);
1119 DEFUN ("re-search-backward", Fre_search_backward
, Sre_search_backward
, 1, 4,
1120 "sRE search backward: ",
1121 "Search backward from point for match for regular expression REGEXP.\n\
1122 Set point to the beginning of the match, and return point.\n\
1123 The match found is the one starting last in the buffer\n\
1124 and yet ending before the place the origin of the search.\n\
1125 An optional second argument bounds the search; it is a buffer position.\n\
1126 The match found must start at or after that position.\n\
1127 Optional third argument, if t, means if fail just return nil (no error).\n\
1128 If not nil and not t, move to limit of search and return nil.\n\
1129 Optional fourth argument is repeat count--search for successive occurrences.\n\
1130 See also the functions `match-beginning', `match-end' and `replace-match'.")
1131 (string
, bound
, noerror
, count
)
1132 Lisp_Object string
, bound
, noerror
, count
;
1134 return search_command (string
, bound
, noerror
, count
, -1, 1);
1137 DEFUN ("re-search-forward", Fre_search_forward
, Sre_search_forward
, 1, 4,
1139 "Search forward from point for regular expression REGEXP.\n\
1140 Set point to the end of the occurrence found, and return point.\n\
1141 An optional second argument bounds the search; it is a buffer position.\n\
1142 The match found must not extend after that position.\n\
1143 Optional third argument, if t, means if fail just return nil (no error).\n\
1144 If not nil and not t, move to limit of search and return nil.\n\
1145 Optional fourth argument is repeat count--search for successive occurrences.\n\
1146 See also the functions `match-beginning', `match-end' and `replace-match'.")
1147 (string
, bound
, noerror
, count
)
1148 Lisp_Object string
, bound
, noerror
, count
;
1150 return search_command (string
, bound
, noerror
, count
, 1, 1);
1153 DEFUN ("replace-match", Freplace_match
, Sreplace_match
, 1, 3, 0,
1154 "Replace text matched by last search with NEWTEXT.\n\
1155 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
1156 Otherwise convert to all caps or cap initials, like replaced text.\n\
1157 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1158 Otherwise treat `\\' as special:\n\
1159 `\\&' in NEWTEXT means substitute original matched text.\n\
1160 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1161 If Nth parens didn't match, substitute nothing.\n\
1162 `\\\\' means insert one `\\'.\n\
1163 FIXEDCASE and LITERAL are optional arguments.\n\
1164 Leaves point at end of replacement text.")
1165 (string
, fixedcase
, literal
)
1166 Lisp_Object string
, fixedcase
, literal
;
1168 enum { nochange
, all_caps
, cap_initial
} case_action
;
1169 register int pos
, last
;
1170 int some_multiletter_word
;
1172 int some_uppercase_initial
;
1173 register int c
, prevc
;
1176 CHECK_STRING (string
, 0);
1178 case_action
= nochange
; /* We tried an initialization */
1179 /* but some C compilers blew it */
1181 if (search_regs
.num_regs
<= 0)
1182 error ("replace-match called before any match found");
1184 if (search_regs
.start
[0] < BEGV
1185 || search_regs
.start
[0] > search_regs
.end
[0]
1186 || search_regs
.end
[0] > ZV
)
1187 args_out_of_range (make_number (search_regs
.start
[0]),
1188 make_number (search_regs
.end
[0]));
1190 if (NILP (fixedcase
))
1192 /* Decide how to casify by examining the matched text. */
1194 last
= search_regs
.end
[0];
1196 case_action
= all_caps
;
1198 /* some_multiletter_word is set nonzero if any original word
1199 is more than one letter long. */
1200 some_multiletter_word
= 0;
1202 some_uppercase_initial
= 0;
1204 for (pos
= search_regs
.start
[0]; pos
< last
; pos
++)
1206 c
= FETCH_CHAR (pos
);
1209 /* Cannot be all caps if any original char is lower case */
1212 if (SYNTAX (prevc
) != Sword
)
1215 some_multiletter_word
= 1;
1217 else if (!NOCASEP (c
))
1219 if (SYNTAX (prevc
) != Sword
)
1220 some_uppercase_initial
= 1;
1222 some_multiletter_word
= 1;
1228 /* Convert to all caps if the old text is all caps
1229 and has at least one multiletter word. */
1230 if (! some_lowercase
&& some_multiletter_word
)
1231 case_action
= all_caps
;
1232 /* Capitalize each word, if the old text has a capitalized word. */
1233 else if (some_uppercase_initial
)
1234 case_action
= cap_initial
;
1236 case_action
= nochange
;
1239 /* We insert the replacement text before the old text, and then
1240 delete the original text. This means that markers at the
1241 beginning or end of the original will float to the corresponding
1242 position in the replacement. */
1243 SET_PT (search_regs
.start
[0]);
1244 if (!NILP (literal
))
1245 Finsert (1, &string
);
1248 struct gcpro gcpro1
;
1251 for (pos
= 0; pos
< XSTRING (string
)->size
; pos
++)
1253 int offset
= point
- search_regs
.start
[0];
1255 c
= XSTRING (string
)->data
[pos
];
1258 c
= XSTRING (string
)->data
[++pos
];
1260 Finsert_buffer_substring
1261 (Fcurrent_buffer (),
1262 make_number (search_regs
.start
[0] + offset
),
1263 make_number (search_regs
.end
[0] + offset
));
1264 else if (c
>= '1' && c
<= search_regs
.num_regs
+ '0')
1266 if (search_regs
.start
[c
- '0'] >= 1)
1267 Finsert_buffer_substring
1268 (Fcurrent_buffer (),
1269 make_number (search_regs
.start
[c
- '0'] + offset
),
1270 make_number (search_regs
.end
[c
- '0'] + offset
));
1281 inslen
= point
- (search_regs
.start
[0]);
1282 del_range (search_regs
.start
[0] + inslen
, search_regs
.end
[0] + inslen
);
1284 if (case_action
== all_caps
)
1285 Fupcase_region (make_number (point
- inslen
), make_number (point
));
1286 else if (case_action
== cap_initial
)
1287 upcase_initials_region (make_number (point
- inslen
), make_number (point
));
1292 match_limit (num
, beginningp
)
1298 CHECK_NUMBER (num
, 0);
1300 if (n
< 0 || n
>= search_regs
.num_regs
)
1301 args_out_of_range (num
, make_number (search_regs
.num_regs
));
1302 if (search_regs
.num_regs
<= 0
1303 || search_regs
.start
[n
] < 0)
1305 return (make_number ((beginningp
) ? search_regs
.start
[n
]
1306 : search_regs
.end
[n
]));
1309 DEFUN ("match-beginning", Fmatch_beginning
, Smatch_beginning
, 1, 1, 0,
1310 "Return position of start of text matched by last search.\n\
1311 ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1312 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1313 Zero means the entire text matched by the whole regexp or whole string.")
1317 return match_limit (num
, 1);
1320 DEFUN ("match-end", Fmatch_end
, Smatch_end
, 1, 1, 0,
1321 "Return position of end of text matched by last search.\n\
1322 ARG, a number, specifies which parenthesized expression in the last regexp.\n\
1323 Value is nil if ARGth pair didn't match, or there were less than ARG pairs.\n\
1324 Zero means the entire text matched by the whole regexp or whole string.")
1328 return match_limit (num
, 0);
1331 DEFUN ("match-data", Fmatch_data
, Smatch_data
, 0, 0, 0,
1332 "Return a list containing all info on what the last search matched.\n\
1333 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1334 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1335 if the last match was on a buffer; integers or nil if a string was matched.\n\
1336 Use `store-match-data' to reinstate the data in this list.")
1342 if (NILP (last_thing_searched
))
1343 error ("match-data called before any match found");
1345 data
= (Lisp_Object
*) alloca ((2 * search_regs
.num_regs
)
1346 * sizeof (Lisp_Object
));
1349 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1351 int start
= search_regs
.start
[i
];
1354 if (EQ (last_thing_searched
, Qt
))
1356 XFASTINT (data
[2 * i
]) = start
;
1357 XFASTINT (data
[2 * i
+ 1]) = search_regs
.end
[i
];
1359 else if (XTYPE (last_thing_searched
) == Lisp_Buffer
)
1361 data
[2 * i
] = Fmake_marker ();
1362 Fset_marker (data
[2 * i
],
1363 make_number (start
),
1364 last_thing_searched
);
1365 data
[2 * i
+ 1] = Fmake_marker ();
1366 Fset_marker (data
[2 * i
+ 1],
1367 make_number (search_regs
.end
[i
]),
1368 last_thing_searched
);
1371 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1377 data
[2 * i
] = data
[2 * i
+ 1] = Qnil
;
1379 return Flist (2 * len
+ 2, data
);
1383 DEFUN ("store-match-data", Fstore_match_data
, Sstore_match_data
, 1, 1, 0,
1384 "Set internal data on last search match from elements of LIST.\n\
1385 LIST should have been created by calling `match-data' previously.")
1387 register Lisp_Object list
;
1390 register Lisp_Object marker
;
1392 if (!CONSP (list
) && !NILP (list
))
1393 list
= wrong_type_argument (Qconsp
, list
);
1395 /* Unless we find a marker with a buffer in LIST, assume that this
1396 match data came from a string. */
1397 last_thing_searched
= Qt
;
1399 /* Allocate registers if they don't already exist. */
1401 int length
= XFASTINT (Flength (list
)) / 2;
1403 if (length
> search_regs
.num_regs
)
1405 if (search_regs
.num_regs
== 0)
1408 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
1410 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
1415 = (regoff_t
*) xrealloc (search_regs
.start
,
1416 length
* sizeof (regoff_t
));
1418 = (regoff_t
*) xrealloc (search_regs
.end
,
1419 length
* sizeof (regoff_t
));
1423 re_set_registers (&searchbuf
, &search_regs
, length
,
1424 search_regs
.start
, search_regs
.end
);
1429 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1431 marker
= Fcar (list
);
1434 search_regs
.start
[i
] = -1;
1439 if (XTYPE (marker
) == Lisp_Marker
)
1441 if (XMARKER (marker
)->buffer
== 0)
1442 XFASTINT (marker
) = 0;
1444 XSET (last_thing_searched
, Lisp_Buffer
,
1445 XMARKER (marker
)->buffer
);
1448 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
1449 search_regs
.start
[i
] = XINT (marker
);
1452 marker
= Fcar (list
);
1453 if (XTYPE (marker
) == Lisp_Marker
1454 && XMARKER (marker
)->buffer
== 0)
1455 XFASTINT (marker
) = 0;
1457 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
1458 search_regs
.end
[i
] = XINT (marker
);
1466 /* Quote a string to inactivate reg-expr chars */
1468 DEFUN ("regexp-quote", Fregexp_quote
, Sregexp_quote
, 1, 1, 0,
1469 "Return a regexp string which matches exactly STRING and nothing else.")
1473 register unsigned char *in
, *out
, *end
;
1474 register unsigned char *temp
;
1476 CHECK_STRING (str
, 0);
1478 temp
= (unsigned char *) alloca (XSTRING (str
)->size
* 2);
1480 /* Now copy the data into the new string, inserting escapes. */
1482 in
= XSTRING (str
)->data
;
1483 end
= in
+ XSTRING (str
)->size
;
1486 for (; in
!= end
; in
++)
1488 if (*in
== '[' || *in
== ']'
1489 || *in
== '*' || *in
== '.' || *in
== '\\'
1490 || *in
== '?' || *in
== '+'
1491 || *in
== '^' || *in
== '$')
1496 return make_string (temp
, out
- temp
);
1503 searchbuf
.allocated
= 100;
1504 searchbuf
.buffer
= (unsigned char *) malloc (searchbuf
.allocated
);
1505 searchbuf
.fastmap
= search_fastmap
;
1507 Qsearch_failed
= intern ("search-failed");
1508 staticpro (&Qsearch_failed
);
1509 Qinvalid_regexp
= intern ("invalid-regexp");
1510 staticpro (&Qinvalid_regexp
);
1512 Fput (Qsearch_failed
, Qerror_conditions
,
1513 Fcons (Qsearch_failed
, Fcons (Qerror
, Qnil
)));
1514 Fput (Qsearch_failed
, Qerror_message
,
1515 build_string ("Search failed"));
1517 Fput (Qinvalid_regexp
, Qerror_conditions
,
1518 Fcons (Qinvalid_regexp
, Fcons (Qerror
, Qnil
)));
1519 Fput (Qinvalid_regexp
, Qerror_message
,
1520 build_string ("Invalid regexp"));
1523 staticpro (&last_regexp
);
1525 last_thing_searched
= Qnil
;
1526 staticpro (&last_thing_searched
);
1528 defsubr (&Sstring_match
);
1529 defsubr (&Slooking_at
);
1530 defsubr (&Sskip_chars_forward
);
1531 defsubr (&Sskip_chars_backward
);
1532 defsubr (&Sskip_syntax_forward
);
1533 defsubr (&Sskip_syntax_backward
);
1534 defsubr (&Ssearch_forward
);
1535 defsubr (&Ssearch_backward
);
1536 defsubr (&Sword_search_forward
);
1537 defsubr (&Sword_search_backward
);
1538 defsubr (&Sre_search_forward
);
1539 defsubr (&Sre_search_backward
);
1540 defsubr (&Sreplace_match
);
1541 defsubr (&Smatch_beginning
);
1542 defsubr (&Smatch_end
);
1543 defsubr (&Smatch_data
);
1544 defsubr (&Sstore_match_data
);
1545 defsubr (&Sregexp_quote
);