1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
28 #include "region-cache.h"
30 #include "blockinput.h"
31 #include "intervals.h"
33 #include <sys/types.h>
36 #define REGEXP_CACHE_SIZE 20
38 /* If the regexp is non-nil, then the buffer contains the compiled form
39 of that regexp, suitable for searching. */
42 struct regexp_cache
*next
;
44 struct re_pattern_buffer buf
;
46 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
50 /* The instances of that struct. */
51 struct regexp_cache searchbufs
[REGEXP_CACHE_SIZE
];
53 /* The head of the linked list; points to the most recently used buffer. */
54 struct regexp_cache
*searchbuf_head
;
57 /* Every call to re_match, etc., must pass &search_regs as the regs
58 argument unless you can show it is unnecessary (i.e., if re_match
59 is certainly going to be called again before region-around-match
62 Since the registers are now dynamically allocated, we need to make
63 sure not to refer to the Nth register before checking that it has
64 been allocated by checking search_regs.num_regs.
66 The regex code keeps track of whether it has allocated the search
67 buffer using bits in the re_pattern_buffer. This means that whenever
68 you compile a new pattern, it completely forgets whether it has
69 allocated any registers, and will allocate new registers the next
70 time you call a searching or matching function. Therefore, we need
71 to call re_set_registers after compiling a new pattern or after
72 setting the match registers, so that the regex functions will be
73 able to free or re-allocate it properly. */
74 static struct re_registers search_regs
;
76 /* The buffer in which the last search was performed, or
77 Qt if the last search was done in a string;
78 Qnil if no searching has been done yet. */
79 static Lisp_Object last_thing_searched
;
81 /* error condition signaled when regexp compile_pattern fails */
83 Lisp_Object Qinvalid_regexp
;
85 static void set_search_regs ();
86 static void save_search_regs ();
88 static int search_buffer ();
93 error ("Stack overflow in regexp matcher");
102 /* Compile a regexp and signal a Lisp error if anything goes wrong.
103 PATTERN is the pattern to compile.
104 CP is the place to put the result.
105 TRANSLATE is a translation table for ignoring case, or NULL for none.
106 REGP is the structure that says where to store the "register"
107 values that will result from matching this pattern.
108 If it is 0, we should compile the pattern not to record any
109 subexpression bounds.
110 POSIX is nonzero if we want full backtracking (POSIX style)
111 for this pattern. 0 means backtrack only enough to get a valid match.
112 MULTIBYTE is nonzero if we want to handle multibyte characters in
113 PATTERN. 0 means all multibyte characters are recognized just as
114 sequences of binary data. */
117 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
)
118 struct regexp_cache
*cp
;
120 Lisp_Object
*translate
;
121 struct re_registers
*regp
;
129 cp
->buf
.translate
= translate
;
131 cp
->buf
.multibyte
= multibyte
;
133 old
= re_set_syntax (RE_SYNTAX_EMACS
134 | (posix
? 0 : RE_NO_POSIX_BACKTRACKING
));
135 val
= (char *) re_compile_pattern ((char *) XSTRING (pattern
)->data
,
136 XSTRING (pattern
)->size
, &cp
->buf
);
140 Fsignal (Qinvalid_regexp
, Fcons (build_string (val
), Qnil
));
142 cp
->regexp
= Fcopy_sequence (pattern
);
145 /* Compile a regexp if necessary, but first check to see if there's one in
147 PATTERN is the pattern to compile.
148 TRANSLATE is a translation table for ignoring case, or NULL for none.
149 REGP is the structure that says where to store the "register"
150 values that will result from matching this pattern.
151 If it is 0, we should compile the pattern not to record any
152 subexpression bounds.
153 POSIX is nonzero if we want full backtracking (POSIX style)
154 for this pattern. 0 means backtrack only enough to get a valid match. */
156 struct re_pattern_buffer
*
157 compile_pattern (pattern
, regp
, translate
, posix
)
159 struct re_registers
*regp
;
160 Lisp_Object
*translate
;
163 struct regexp_cache
*cp
, **cpp
;
164 /* Should we check it here, or add an argument `multibyte' to this
166 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
168 for (cpp
= &searchbuf_head
; ; cpp
= &cp
->next
)
171 if (XSTRING (cp
->regexp
)->size
== XSTRING (pattern
)->size
172 && !NILP (Fstring_equal (cp
->regexp
, pattern
))
173 && cp
->buf
.translate
== translate
174 && cp
->posix
== posix
175 && cp
->buf
.multibyte
== multibyte
)
178 /* If we're at the end of the cache, compile into the last cell. */
181 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
);
186 /* When we get here, cp (aka *cpp) contains the compiled pattern,
187 either because we found it in the cache or because we just compiled it.
188 Move it to the front of the queue to mark it as most recently used. */
190 cp
->next
= searchbuf_head
;
193 /* Advise the searching functions about the space we have allocated
194 for register data. */
196 re_set_registers (&cp
->buf
, regp
, regp
->num_regs
, regp
->start
, regp
->end
);
201 /* Error condition used for failing searches */
202 Lisp_Object Qsearch_failed
;
208 Fsignal (Qsearch_failed
, Fcons (arg
, Qnil
));
213 looking_at_1 (string
, posix
)
218 unsigned char *p1
, *p2
;
221 struct re_pattern_buffer
*bufp
;
223 if (running_asynch_code
)
226 CHECK_STRING (string
, 0);
227 bufp
= compile_pattern (string
, &search_regs
,
228 (!NILP (current_buffer
->case_fold_search
)
229 ? XCHAR_TABLE (DOWNCASE_TABLE
)->contents
: 0),
233 QUIT
; /* Do a pending quit right away, to avoid paradoxical behavior */
235 /* Get pointers and sizes of the two strings
236 that make up the visible portion of the buffer. */
239 s1
= GPT_BYTE
- BEGV_BYTE
;
241 s2
= ZV_BYTE
- GPT_BYTE
;
245 s2
= ZV_BYTE
- BEGV_BYTE
;
250 s1
= ZV_BYTE
- BEGV_BYTE
;
254 re_match_object
= Qnil
;
256 i
= re_match_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
257 PT_BYTE
- BEGV_BYTE
, &search_regs
,
258 ZV_BYTE
- BEGV_BYTE
);
262 val
= (0 <= i
? Qt
: Qnil
);
264 for (i
= 0; i
< search_regs
.num_regs
; i
++)
265 if (search_regs
.start
[i
] >= 0)
268 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
270 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
272 XSETBUFFER (last_thing_searched
, current_buffer
);
277 DEFUN ("looking-at", Flooking_at
, Slooking_at
, 1, 1, 0,
278 "Return t if text after point matches regular expression REGEXP.\n\
279 This function modifies the match data that `match-beginning',\n\
280 `match-end' and `match-data' access; save and restore the match\n\
281 data if you want to preserve them.")
285 return looking_at_1 (regexp
, 0);
288 DEFUN ("posix-looking-at", Fposix_looking_at
, Sposix_looking_at
, 1, 1, 0,
289 "Return t if text after point matches regular expression REGEXP.\n\
290 Find the longest match, in accord with Posix regular expression rules.\n\
291 This function modifies the match data that `match-beginning',\n\
292 `match-end' and `match-data' access; save and restore the match\n\
293 data if you want to preserve them.")
297 return looking_at_1 (regexp
, 1);
301 string_match_1 (regexp
, string
, start
, posix
)
302 Lisp_Object regexp
, string
, start
;
307 struct re_pattern_buffer
*bufp
;
309 if (running_asynch_code
)
312 CHECK_STRING (regexp
, 0);
313 CHECK_STRING (string
, 1);
319 int len
= XSTRING (string
)->size
;
321 CHECK_NUMBER (start
, 2);
323 if (s
< 0 && -s
<= len
)
325 else if (0 > s
|| s
> len
)
326 args_out_of_range (string
, start
);
329 bufp
= compile_pattern (regexp
, &search_regs
,
330 (!NILP (current_buffer
->case_fold_search
)
331 ? XCHAR_TABLE (DOWNCASE_TABLE
)->contents
: 0),
334 re_match_object
= string
;
336 val
= re_search (bufp
, (char *) XSTRING (string
)->data
,
337 XSTRING (string
)->size
, s
, XSTRING (string
)->size
- s
,
340 last_thing_searched
= Qt
;
343 if (val
< 0) return Qnil
;
344 return make_number (val
);
347 DEFUN ("string-match", Fstring_match
, Sstring_match
, 2, 3, 0,
348 "Return index of start of first match for REGEXP in STRING, or nil.\n\
349 If third arg START is non-nil, start search at that index in STRING.\n\
350 For index of first char beyond the match, do (match-end 0).\n\
351 `match-end' and `match-beginning' also give indices of substrings\n\
352 matched by parenthesis constructs in the pattern.")
353 (regexp
, string
, start
)
354 Lisp_Object regexp
, string
, start
;
356 return string_match_1 (regexp
, string
, start
, 0);
359 DEFUN ("posix-string-match", Fposix_string_match
, Sposix_string_match
, 2, 3, 0,
360 "Return index of start of first match for REGEXP in STRING, or nil.\n\
361 Find the longest match, in accord with Posix regular expression rules.\n\
362 If third arg START is non-nil, start search at that index in STRING.\n\
363 For index of first char beyond the match, do (match-end 0).\n\
364 `match-end' and `match-beginning' also give indices of substrings\n\
365 matched by parenthesis constructs in the pattern.")
366 (regexp
, string
, start
)
367 Lisp_Object regexp
, string
, start
;
369 return string_match_1 (regexp
, string
, start
, 1);
372 /* Match REGEXP against STRING, searching all of STRING,
373 and return the index of the match, or negative on failure.
374 This does not clobber the match data. */
377 fast_string_match (regexp
, string
)
378 Lisp_Object regexp
, string
;
381 struct re_pattern_buffer
*bufp
;
383 bufp
= compile_pattern (regexp
, 0, 0, 0);
385 re_match_object
= string
;
387 val
= re_search (bufp
, (char *) XSTRING (string
)->data
,
388 XSTRING (string
)->size
, 0, XSTRING (string
)->size
,
394 /* Match REGEXP against STRING, searching all of STRING ignoring case,
395 and return the index of the match, or negative on failure.
396 This does not clobber the match data. */
398 extern Lisp_Object Vascii_downcase_table
;
401 fast_c_string_match_ignore_case (regexp
, string
)
406 struct re_pattern_buffer
*bufp
;
407 int len
= strlen (string
);
409 re_match_object
= Qt
;
410 bufp
= compile_pattern (regexp
, 0,
411 XCHAR_TABLE (Vascii_downcase_table
)->contents
, 0);
413 val
= re_search (bufp
, string
, len
, 0, len
, 0);
424 return ((a
> b
) ? a
: b
);
431 return ((a
< b
) ? a
: b
);
435 /* The newline cache: remembering which sections of text have no newlines. */
437 /* If the user has requested newline caching, make sure it's on.
438 Otherwise, make sure it's off.
439 This is our cheezy way of associating an action with the change of
440 state of a buffer-local variable. */
442 newline_cache_on_off (buf
)
445 if (NILP (buf
->cache_long_line_scans
))
447 /* It should be off. */
448 if (buf
->newline_cache
)
450 free_region_cache (buf
->newline_cache
);
451 buf
->newline_cache
= 0;
456 /* It should be on. */
457 if (buf
->newline_cache
== 0)
458 buf
->newline_cache
= new_region_cache ();
463 /* Search for COUNT instances of the character TARGET between START and END.
465 If COUNT is positive, search forwards; END must be >= START.
466 If COUNT is negative, search backwards for the -COUNTth instance;
467 END must be <= START.
468 If COUNT is zero, do anything you please; run rogue, for all I care.
470 If END is zero, use BEGV or ZV instead, as appropriate for the
471 direction indicated by COUNT.
473 If we find COUNT instances, set *SHORTAGE to zero, and return the
474 position after the COUNTth match. Note that for reverse motion
475 this is not the same as the usual convention for Emacs motion commands.
477 If we don't find COUNT instances before reaching END, set *SHORTAGE
478 to the number of TARGETs left unfound, and return END.
480 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
481 except when inside redisplay. */
483 scan_buffer (target
, start
, end
, count
, shortage
, allow_quit
)
490 struct region_cache
*newline_cache
;
501 if (! end
) end
= BEGV
;
504 newline_cache_on_off (current_buffer
);
505 newline_cache
= current_buffer
->newline_cache
;
510 immediate_quit
= allow_quit
;
515 /* Our innermost scanning loop is very simple; it doesn't know
516 about gaps, buffer ends, or the newline cache. ceiling is
517 the position of the last character before the next such
518 obstacle --- the last character the dumb search loop should
520 int ceiling_byte
= CHAR_TO_BYTE (end
) - 1;
521 int start_byte
= CHAR_TO_BYTE (start
);
523 /* If we're looking for a newline, consult the newline cache
524 to see where we can avoid some scanning. */
525 if (target
== '\n' && newline_cache
)
529 while (region_cache_forward
530 (current_buffer
, newline_cache
, start_byte
, &next_change
))
531 start_byte
= next_change
;
532 immediate_quit
= allow_quit
;
534 /* START should never be after END. */
535 if (start_byte
> ceiling_byte
)
536 start_byte
= ceiling_byte
;
538 /* Now the text after start is an unknown region, and
539 next_change is the position of the next known region. */
540 ceiling_byte
= min (next_change
- 1, ceiling_byte
);
543 /* The dumb loop can only scan text stored in contiguous
544 bytes. BUFFER_CEILING_OF returns the last character
545 position that is contiguous, so the ceiling is the
546 position after that. */
547 ceiling_byte
= min (BUFFER_CEILING_OF (start_byte
), ceiling_byte
);
550 /* The termination address of the dumb loop. */
551 register unsigned char *ceiling_addr
552 = BYTE_POS_ADDR (ceiling_byte
) + 1;
553 register unsigned char *cursor
554 = BYTE_POS_ADDR (start_byte
);
555 unsigned char *base
= cursor
;
557 while (cursor
< ceiling_addr
)
559 unsigned char *scan_start
= cursor
;
562 while (*cursor
!= target
&& ++cursor
< ceiling_addr
)
565 /* If we're looking for newlines, cache the fact that
566 the region from start to cursor is free of them. */
567 if (target
== '\n' && newline_cache
)
568 know_region_cache (current_buffer
, newline_cache
,
569 start_byte
+ scan_start
- base
,
570 start_byte
+ cursor
- base
);
572 /* Did we find the target character? */
573 if (cursor
< ceiling_addr
)
578 return BYTE_TO_CHAR (start_byte
+ cursor
- base
+ 1);
584 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
590 /* The last character to check before the next obstacle. */
591 int ceiling_byte
= CHAR_TO_BYTE (end
);
592 int start_byte
= CHAR_TO_BYTE (start
);
594 /* Consult the newline cache, if appropriate. */
595 if (target
== '\n' && newline_cache
)
599 while (region_cache_backward
600 (current_buffer
, newline_cache
, start_byte
, &next_change
))
601 start_byte
= next_change
;
602 immediate_quit
= allow_quit
;
604 /* Start should never be at or before end. */
605 if (start_byte
<= ceiling_byte
)
606 start_byte
= ceiling_byte
+ 1;
608 /* Now the text before start is an unknown region, and
609 next_change is the position of the next known region. */
610 ceiling_byte
= max (next_change
, ceiling_byte
);
613 /* Stop scanning before the gap. */
614 ceiling_byte
= max (BUFFER_FLOOR_OF (start_byte
- 1), ceiling_byte
);
617 /* The termination address of the dumb loop. */
618 register unsigned char *ceiling_addr
= BYTE_POS_ADDR (ceiling_byte
);
619 register unsigned char *cursor
= BYTE_POS_ADDR (start_byte
- 1);
620 unsigned char *base
= cursor
;
622 while (cursor
>= ceiling_addr
)
624 unsigned char *scan_start
= cursor
;
626 while (*cursor
!= target
&& --cursor
>= ceiling_addr
)
629 /* If we're looking for newlines, cache the fact that
630 the region from after the cursor to start is free of them. */
631 if (target
== '\n' && newline_cache
)
632 know_region_cache (current_buffer
, newline_cache
,
633 start_byte
+ cursor
- base
,
634 start_byte
+ scan_start
- base
);
636 /* Did we find the target character? */
637 if (cursor
>= ceiling_addr
)
642 return BYTE_TO_CHAR (start_byte
+ cursor
- base
);
648 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
654 *shortage
= count
* direction
;
658 /* Search for COUNT instances of a line boundary, which means either a
659 newline or (if selective display enabled) a carriage return.
660 Start at START. If COUNT is negative, search backwards.
662 We report the resulting position by calling TEMP_SET_PT_BOTH.
664 If we find COUNT instances. we position after (always after,
665 even if scanning backwards) the COUNTth match, and return 0.
667 If we don't find COUNT instances before reaching the end of the
668 buffer (or the beginning, if scanning backwards), we return
669 the number of line boundaries left unfound, and position at
670 the limit we bumped up against.
672 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
673 except in special cases. */
676 scan_newline (start
, start_byte
, limit
, limit_byte
, count
, allow_quit
)
677 int start
, start_byte
;
678 int limit
, limit_byte
;
682 int direction
= ((count
> 0) ? 1 : -1);
684 register unsigned char *cursor
;
687 register int ceiling
;
688 register unsigned char *ceiling_addr
;
690 int old_immediate_quit
= immediate_quit
;
692 /* If we are not in selective display mode,
693 check only for newlines. */
694 int selective_display
= (!NILP (current_buffer
->selective_display
)
695 && !INTEGERP (current_buffer
->selective_display
));
697 /* The code that follows is like scan_buffer
698 but checks for either newline or carriage return. */
703 start_byte
= CHAR_TO_BYTE (start
);
707 while (start_byte
< limit_byte
)
709 ceiling
= BUFFER_CEILING_OF (start_byte
);
710 ceiling
= min (limit_byte
- 1, ceiling
);
711 ceiling_addr
= BYTE_POS_ADDR (ceiling
) + 1;
712 base
= (cursor
= BYTE_POS_ADDR (start_byte
));
715 while (*cursor
!= '\n' && ++cursor
!= ceiling_addr
)
718 if (cursor
!= ceiling_addr
)
722 immediate_quit
= old_immediate_quit
;
723 start_byte
= start_byte
+ cursor
- base
+ 1;
724 start
= BYTE_TO_CHAR (start_byte
);
725 TEMP_SET_PT_BOTH (start
, start_byte
);
729 if (++cursor
== ceiling_addr
)
735 start_byte
+= cursor
- base
;
740 int start_byte
= CHAR_TO_BYTE (start
);
741 while (start_byte
> limit_byte
)
743 ceiling
= BUFFER_FLOOR_OF (start_byte
- 1);
744 ceiling
= max (limit_byte
, ceiling
);
745 ceiling_addr
= BYTE_POS_ADDR (ceiling
) - 1;
746 base
= (cursor
= BYTE_POS_ADDR (start_byte
- 1) + 1);
749 while (--cursor
!= ceiling_addr
&& *cursor
!= '\n')
752 if (cursor
!= ceiling_addr
)
756 immediate_quit
= old_immediate_quit
;
757 /* Return the position AFTER the match we found. */
758 start_byte
= start_byte
+ cursor
- base
+ 1;
759 start
= BYTE_TO_CHAR (start_byte
);
760 TEMP_SET_PT_BOTH (start
, start_byte
);
767 /* Here we add 1 to compensate for the last decrement
768 of CURSOR, which took it past the valid range. */
769 start_byte
+= cursor
- base
+ 1;
773 TEMP_SET_PT_BOTH (limit
, limit_byte
);
774 immediate_quit
= old_immediate_quit
;
776 return count
* direction
;
780 find_next_newline_no_quit (from
, cnt
)
781 register int from
, cnt
;
783 return scan_buffer ('\n', from
, 0, cnt
, (int *) 0, 0);
786 /* Like find_next_newline, but returns position before the newline,
787 not after, and only search up to TO. This isn't just
788 find_next_newline (...)-1, because you might hit TO. */
791 find_before_next_newline (from
, to
, cnt
)
795 int pos
= scan_buffer ('\n', from
, to
, cnt
, &shortage
, 1);
803 /* Subroutines of Lisp buffer search functions. */
806 search_command (string
, bound
, noerror
, count
, direction
, RE
, posix
)
807 Lisp_Object string
, bound
, noerror
, count
;
818 CHECK_NUMBER (count
, 3);
822 CHECK_STRING (string
, 0);
824 lim
= n
> 0 ? ZV
: BEGV
;
827 CHECK_NUMBER_COERCE_MARKER (bound
, 1);
829 if (n
> 0 ? lim
< PT
: lim
> PT
)
830 error ("Invalid search bound (wrong side of point)");
837 np
= search_buffer (string
, PT
, lim
, n
, RE
,
838 (!NILP (current_buffer
->case_fold_search
)
839 ? XCHAR_TABLE (current_buffer
->case_canon_table
)->contents
841 (!NILP (current_buffer
->case_fold_search
)
842 ? XCHAR_TABLE (current_buffer
->case_eqv_table
)->contents
848 return signal_failure (string
);
849 if (!EQ (noerror
, Qt
))
851 if (lim
< BEGV
|| lim
> ZV
)
855 #if 0 /* This would be clean, but maybe programs depend on
856 a value of nil here. */
864 if (np
< BEGV
|| np
> ZV
)
869 return make_number (np
);
872 /* Return 1 if REGEXP it matches just one constant string. */
875 trivial_regexp_p (regexp
)
878 int len
= XSTRING (regexp
)->size
;
879 unsigned char *s
= XSTRING (regexp
)->data
;
885 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
892 case '|': case '(': case ')': case '`': case '\'': case 'b':
893 case 'B': case '<': case '>': case 'w': case 'W': case 's':
895 case 'c': case 'C': /* for categoryspec and notcategoryspec */
896 case '1': case '2': case '3': case '4': case '5':
897 case '6': case '7': case '8': case '9':
905 /* Search for the n'th occurrence of STRING in the current buffer,
906 starting at position POS and stopping at position LIM,
907 treating STRING as a literal string if RE is false or as
908 a regular expression if RE is true.
910 If N is positive, searching is forward and LIM must be greater than POS.
911 If N is negative, searching is backward and LIM must be less than POS.
913 Returns -x if only N-x occurrences found (x > 0),
914 or else the position at the beginning of the Nth occurrence
915 (if searching backward) or the end (if searching forward).
917 POSIX is nonzero if we want full backtracking (POSIX style)
918 for this pattern. 0 means backtrack only enough to get a valid match. */
921 search_buffer (string
, pos
, lim
, n
, RE
, trt
, inverse_trt
, posix
)
928 Lisp_Object
*inverse_trt
;
931 int len
= XSTRING (string
)->size
;
932 unsigned char *base_pat
= XSTRING (string
)->data
;
933 register int *BM_tab
;
935 register int direction
= ((n
> 0) ? 1 : -1);
937 int infinity
, limit
, k
, stride_for_teases
;
938 register unsigned char *pat
, *cursor
, *p_limit
;
940 unsigned char *p1
, *p2
;
943 if (running_asynch_code
)
946 /* Null string is found at starting position. */
949 set_search_regs (pos
, 0);
953 /* Searching 0 times means don't move. */
957 if (RE
&& !trivial_regexp_p (string
))
959 struct re_pattern_buffer
*bufp
;
961 bufp
= compile_pattern (string
, &search_regs
, trt
, posix
);
963 immediate_quit
= 1; /* Quit immediately if user types ^G,
964 because letting this function finish
965 can take too long. */
966 QUIT
; /* Do a pending quit right away,
967 to avoid paradoxical behavior */
968 /* Get pointers and sizes of the two strings
969 that make up the visible portion of the buffer. */
972 s1
= GPT_BYTE
- BEGV_BYTE
;
974 s2
= ZV_BYTE
- GPT_BYTE
;
978 s2
= ZV_BYTE
- BEGV_BYTE
;
983 s1
= ZV_BYTE
- BEGV_BYTE
;
986 re_match_object
= Qnil
;
991 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
992 pos
- BEGV
, lim
- pos
, &search_regs
,
993 /* Don't allow match past current point */
1001 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1002 if (search_regs
.start
[i
] >= 0)
1004 search_regs
.start
[i
]
1005 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1007 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1009 XSETBUFFER (last_thing_searched
, current_buffer
);
1010 /* Set pos to the new position. */
1011 pos
= search_regs
.start
[0];
1023 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1024 pos
- BEGV
, lim
- pos
, &search_regs
,
1028 matcher_overflow ();
1032 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1033 if (search_regs
.start
[i
] >= 0)
1035 search_regs
.start
[i
]
1036 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1038 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1040 XSETBUFFER (last_thing_searched
, current_buffer
);
1041 pos
= search_regs
.end
[0];
1053 else /* non-RE case */
1055 int pos_byte
= CHAR_TO_BYTE (pos
);
1056 int lim_byte
= CHAR_TO_BYTE (lim
);
1058 int BM_tab_space
[0400];
1059 BM_tab
= &BM_tab_space
[0];
1061 BM_tab
= (int *) alloca (0400 * sizeof (int));
1064 unsigned char *patbuf
= (unsigned char *) alloca (len
);
1068 /* If we got here and the RE flag is set, it's because we're
1069 dealing with a regexp known to be trivial, so the backslash
1070 just quotes the next character. */
1071 if (RE
&& *base_pat
== '\\')
1076 *pat
++ = (trt
? XINT (trt
[*base_pat
++]) : *base_pat
++);
1079 pat
= base_pat
= patbuf
;
1081 /* The general approach is that we are going to maintain that we know */
1082 /* the first (closest to the present position, in whatever direction */
1083 /* we're searching) character that could possibly be the last */
1084 /* (furthest from present position) character of a valid match. We */
1085 /* advance the state of our knowledge by looking at that character */
1086 /* and seeing whether it indeed matches the last character of the */
1087 /* pattern. If it does, we take a closer look. If it does not, we */
1088 /* move our pointer (to putative last characters) as far as is */
1089 /* logically possible. This amount of movement, which I call a */
1090 /* stride, will be the length of the pattern if the actual character */
1091 /* appears nowhere in the pattern, otherwise it will be the distance */
1092 /* from the last occurrence of that character to the end of the */
1094 /* As a coding trick, an enormous stride is coded into the table for */
1095 /* characters that match the last character. This allows use of only */
1096 /* a single test, a test for having gone past the end of the */
1097 /* permissible match region, to test for both possible matches (when */
1098 /* the stride goes past the end immediately) and failure to */
1099 /* match (where you get nudged past the end one stride at a time). */
1101 /* Here we make a "mickey mouse" BM table. The stride of the search */
1102 /* is determined only by the last character of the putative match. */
1103 /* If that character does not match, we will stride the proper */
1104 /* distance to propose a match that superimposes it on the last */
1105 /* instance of a character that matches it (per trt), or misses */
1106 /* it entirely if there is none. */
1108 dirlen
= len
* direction
;
1109 infinity
= dirlen
- (lim_byte
+ pos_byte
+ len
+ len
) * direction
;
1111 pat
= (base_pat
+= len
- 1);
1112 BM_tab_base
= BM_tab
;
1114 j
= dirlen
; /* to get it in a register */
1115 /* A character that does not appear in the pattern induces a */
1116 /* stride equal to the pattern length. */
1117 while (BM_tab_base
!= BM_tab
)
1125 while (i
!= infinity
)
1127 j
= pat
[i
]; i
+= direction
;
1128 if (i
== dirlen
) i
= infinity
;
1131 k
= (j
= XINT (trt
[j
]));
1133 stride_for_teases
= BM_tab
[j
];
1134 BM_tab
[j
] = dirlen
- i
;
1135 /* A translation table is accompanied by its inverse -- see */
1136 /* comment following downcase_table for details */
1137 while ((j
= (unsigned char) XINT (inverse_trt
[j
])) != k
)
1138 BM_tab
[j
] = dirlen
- i
;
1143 stride_for_teases
= BM_tab
[j
];
1144 BM_tab
[j
] = dirlen
- i
;
1146 /* stride_for_teases tells how much to stride if we get a */
1147 /* match on the far character but are subsequently */
1148 /* disappointed, by recording what the stride would have been */
1149 /* for that character if the last character had been */
1152 infinity
= dirlen
- infinity
;
1153 pos_byte
+= dirlen
- ((direction
> 0) ? direction
: 0);
1154 /* loop invariant - POS_BYTE points at where last char (first
1155 char if reverse) of pattern would align in a possible match. */
1158 /* It's been reported that some (broken) compiler thinks that
1159 Boolean expressions in an arithmetic context are unsigned.
1160 Using an explicit ?1:0 prevents this. */
1161 if ((lim_byte
- pos_byte
- ((direction
> 0) ? 1 : 0)) * direction
1163 return (n
* (0 - direction
));
1164 /* First we do the part we can by pointers (maybe nothing) */
1167 limit
= pos_byte
- dirlen
+ direction
;
1168 limit
= ((direction
> 0)
1169 ? BUFFER_CEILING_OF (limit
)
1170 : BUFFER_FLOOR_OF (limit
));
1171 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1172 can take on without hitting edge of buffer or the gap. */
1173 limit
= ((direction
> 0)
1174 ? min (lim_byte
- 1, min (limit
, pos_byte
+ 20000))
1175 : max (lim_byte
, max (limit
, pos_byte
- 20000)));
1176 if ((limit
- pos_byte
) * direction
> 20)
1178 p_limit
= BYTE_POS_ADDR (limit
);
1179 p2
= (cursor
= BYTE_POS_ADDR (pos_byte
));
1180 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1181 while (1) /* use one cursor setting as long as i can */
1183 if (direction
> 0) /* worth duplicating */
1185 /* Use signed comparison if appropriate
1186 to make cursor+infinity sure to be > p_limit.
1187 Assuming that the buffer lies in a range of addresses
1188 that are all "positive" (as ints) or all "negative",
1189 either kind of comparison will work as long
1190 as we don't step by infinity. So pick the kind
1191 that works when we do step by infinity. */
1192 if ((EMACS_INT
) (p_limit
+ infinity
) > (EMACS_INT
) p_limit
)
1193 while ((EMACS_INT
) cursor
<= (EMACS_INT
) p_limit
)
1194 cursor
+= BM_tab
[*cursor
];
1196 while ((EMACS_UINT
) cursor
<= (EMACS_UINT
) p_limit
)
1197 cursor
+= BM_tab
[*cursor
];
1201 if ((EMACS_INT
) (p_limit
+ infinity
) < (EMACS_INT
) p_limit
)
1202 while ((EMACS_INT
) cursor
>= (EMACS_INT
) p_limit
)
1203 cursor
+= BM_tab
[*cursor
];
1205 while ((EMACS_UINT
) cursor
>= (EMACS_UINT
) p_limit
)
1206 cursor
+= BM_tab
[*cursor
];
1208 /* If you are here, cursor is beyond the end of the searched region. */
1209 /* This can happen if you match on the far character of the pattern, */
1210 /* because the "stride" of that character is infinity, a number able */
1211 /* to throw you well beyond the end of the search. It can also */
1212 /* happen if you fail to match within the permitted region and would */
1213 /* otherwise try a character beyond that region */
1214 if ((cursor
- p_limit
) * direction
<= len
)
1215 break; /* a small overrun is genuine */
1216 cursor
-= infinity
; /* large overrun = hit */
1217 i
= dirlen
- direction
;
1220 while ((i
-= direction
) + direction
!= 0)
1221 if (pat
[i
] != XINT (trt
[*(cursor
-= direction
)]))
1226 while ((i
-= direction
) + direction
!= 0)
1227 if (pat
[i
] != *(cursor
-= direction
))
1230 cursor
+= dirlen
- i
- direction
; /* fix cursor */
1231 if (i
+ direction
== 0)
1233 cursor
-= direction
;
1235 set_search_regs (pos_byte
+ cursor
- p2
+ ((direction
> 0)
1239 if ((n
-= direction
) != 0)
1240 cursor
+= dirlen
; /* to resume search */
1242 return ((direction
> 0)
1243 ? search_regs
.end
[0] : search_regs
.start
[0]);
1246 cursor
+= stride_for_teases
; /* <sigh> we lose - */
1248 pos_byte
+= cursor
- p2
;
1251 /* Now we'll pick up a clump that has to be done the hard */
1252 /* way because it covers a discontinuity */
1254 limit
= ((direction
> 0)
1255 ? BUFFER_CEILING_OF (pos_byte
- dirlen
+ 1)
1256 : BUFFER_FLOOR_OF (pos_byte
- dirlen
- 1));
1257 limit
= ((direction
> 0)
1258 ? min (limit
+ len
, lim_byte
- 1)
1259 : max (limit
- len
, lim_byte
));
1260 /* LIMIT is now the last value POS_BYTE can have
1261 and still be valid for a possible match. */
1264 /* This loop can be coded for space rather than */
1265 /* speed because it will usually run only once. */
1266 /* (the reach is at most len + 21, and typically */
1267 /* does not exceed len) */
1268 while ((limit
- pos_byte
) * direction
>= 0)
1269 pos_byte
+= BM_tab
[FETCH_BYTE (pos_byte
)];
1270 /* now run the same tests to distinguish going off the */
1271 /* end, a match or a phony match. */
1272 if ((pos_byte
- limit
) * direction
<= len
)
1273 break; /* ran off the end */
1274 /* Found what might be a match.
1275 Set POS_BYTE back to last (first if reverse) pos. */
1276 pos_byte
-= infinity
;
1277 i
= dirlen
- direction
;
1278 while ((i
-= direction
) + direction
!= 0)
1280 pos_byte
-= direction
;
1281 if (pat
[i
] != (trt
!= 0
1282 ? XINT (trt
[FETCH_BYTE (pos_byte
)])
1283 : FETCH_BYTE (pos_byte
)))
1286 /* Above loop has moved POS_BYTE part or all the way
1287 back to the first pos (last pos if reverse).
1288 Set it once again at the last (first if reverse) char. */
1289 pos_byte
+= dirlen
- i
- direction
;
1290 if (i
+ direction
== 0)
1292 pos_byte
-= direction
;
1294 set_search_regs (pos_byte
+ ((direction
> 0) ? 1 - len
: 0),
1297 if ((n
-= direction
) != 0)
1298 pos_byte
+= dirlen
; /* to resume search */
1300 return ((direction
> 0)
1301 ? search_regs
.end
[0] : search_regs
.start
[0]);
1304 pos_byte
+= stride_for_teases
;
1307 /* We have done one clump. Can we continue? */
1308 if ((lim_byte
- pos_byte
) * direction
< 0)
1309 return ((0 - n
) * direction
);
1311 return BYTE_TO_CHAR (pos_byte
);
1315 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1316 for a match just found in the current buffer. */
1319 set_search_regs (beg_byte
, nbytes
)
1320 int beg_byte
, nbytes
;
1322 /* Make sure we have registers in which to store
1323 the match position. */
1324 if (search_regs
.num_regs
== 0)
1326 search_regs
.start
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
1327 search_regs
.end
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
1328 search_regs
.num_regs
= 2;
1331 search_regs
.start
[0] = BYTE_TO_CHAR (beg_byte
);
1332 search_regs
.end
[0] = BYTE_TO_CHAR (beg_byte
+ nbytes
);
1333 XSETBUFFER (last_thing_searched
, current_buffer
);
1336 /* Given a string of words separated by word delimiters,
1337 compute a regexp that matches those exact words
1338 separated by arbitrary punctuation. */
1344 register unsigned char *p
, *o
;
1345 register int i
, len
, punct_count
= 0, word_count
= 0;
1348 CHECK_STRING (string
, 0);
1349 p
= XSTRING (string
)->data
;
1350 len
= XSTRING (string
)->size
;
1352 for (i
= 0; i
< len
; i
++)
1353 if (SYNTAX (p
[i
]) != Sword
)
1356 if (i
> 0 && SYNTAX (p
[i
-1]) == Sword
) word_count
++;
1358 if (SYNTAX (p
[len
-1]) == Sword
) word_count
++;
1359 if (!word_count
) return build_string ("");
1361 val
= make_string (p
, len
- punct_count
+ 5 * (word_count
- 1) + 4);
1363 o
= XSTRING (val
)->data
;
1367 for (i
= 0; i
< len
; i
++)
1368 if (SYNTAX (p
[i
]) == Sword
)
1370 else if (i
> 0 && SYNTAX (p
[i
-1]) == Sword
&& --word_count
)
1385 DEFUN ("search-backward", Fsearch_backward
, Ssearch_backward
, 1, 4,
1386 "MSearch backward: ",
1387 "Search backward from point for STRING.\n\
1388 Set point to the beginning of the occurrence found, and return point.\n\
1389 An optional second argument bounds the search; it is a buffer position.\n\
1390 The match found must not extend before that position.\n\
1391 Optional third argument, if t, means if fail just return nil (no error).\n\
1392 If not nil and not t, position at limit of search and return nil.\n\
1393 Optional fourth argument is repeat count--search for successive occurrences.\n\
1394 See also the functions `match-beginning', `match-end' and `replace-match'.")
1395 (string
, bound
, noerror
, count
)
1396 Lisp_Object string
, bound
, noerror
, count
;
1398 return search_command (string
, bound
, noerror
, count
, -1, 0, 0);
1401 DEFUN ("search-forward", Fsearch_forward
, Ssearch_forward
, 1, 4, "MSearch: ",
1402 "Search forward from point for STRING.\n\
1403 Set point to the end of the occurrence found, and return point.\n\
1404 An optional second argument bounds the search; it is a buffer position.\n\
1405 The match found must not extend after that position. nil is equivalent\n\
1407 Optional third argument, if t, means if fail just return nil (no error).\n\
1408 If not nil and not t, move to limit of search and return nil.\n\
1409 Optional fourth argument is repeat count--search for successive occurrences.\n\
1410 See also the functions `match-beginning', `match-end' and `replace-match'.")
1411 (string
, bound
, noerror
, count
)
1412 Lisp_Object string
, bound
, noerror
, count
;
1414 return search_command (string
, bound
, noerror
, count
, 1, 0, 0);
1417 DEFUN ("word-search-backward", Fword_search_backward
, Sword_search_backward
, 1, 4,
1418 "sWord search backward: ",
1419 "Search backward from point for STRING, ignoring differences in punctuation.\n\
1420 Set point to the beginning of the occurrence found, and return point.\n\
1421 An optional second argument bounds the search; it is a buffer position.\n\
1422 The match found must not extend before that position.\n\
1423 Optional third argument, if t, means if fail just return nil (no error).\n\
1424 If not nil and not t, move to limit of search and return nil.\n\
1425 Optional fourth argument is repeat count--search for successive occurrences.")
1426 (string
, bound
, noerror
, count
)
1427 Lisp_Object string
, bound
, noerror
, count
;
1429 return search_command (wordify (string
), bound
, noerror
, count
, -1, 1, 0);
1432 DEFUN ("word-search-forward", Fword_search_forward
, Sword_search_forward
, 1, 4,
1434 "Search forward from point for STRING, ignoring differences in punctuation.\n\
1435 Set point to the end of the occurrence found, and return point.\n\
1436 An optional second argument bounds the search; it is a buffer position.\n\
1437 The match found must not extend after that position.\n\
1438 Optional third argument, if t, means if fail just return nil (no error).\n\
1439 If not nil and not t, move to limit of search and return nil.\n\
1440 Optional fourth argument is repeat count--search for successive occurrences.")
1441 (string
, bound
, noerror
, count
)
1442 Lisp_Object string
, bound
, noerror
, count
;
1444 return search_command (wordify (string
), bound
, noerror
, count
, 1, 1, 0);
1447 DEFUN ("re-search-backward", Fre_search_backward
, Sre_search_backward
, 1, 4,
1448 "sRE search backward: ",
1449 "Search backward from point for match for regular expression REGEXP.\n\
1450 Set point to the beginning of the match, and return point.\n\
1451 The match found is the one starting last in the buffer\n\
1452 and yet ending before the origin of the search.\n\
1453 An optional second argument bounds the search; it is a buffer position.\n\
1454 The match found must start at or after that position.\n\
1455 Optional third argument, if t, means if fail just return nil (no error).\n\
1456 If not nil and not t, move to limit of search and return nil.\n\
1457 Optional fourth argument is repeat count--search for successive occurrences.\n\
1458 See also the functions `match-beginning', `match-end' and `replace-match'.")
1459 (regexp
, bound
, noerror
, count
)
1460 Lisp_Object regexp
, bound
, noerror
, count
;
1462 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 0);
1465 DEFUN ("re-search-forward", Fre_search_forward
, Sre_search_forward
, 1, 4,
1467 "Search forward from point for regular expression REGEXP.\n\
1468 Set point to the end of the occurrence found, and return point.\n\
1469 An optional second argument bounds the search; it is a buffer position.\n\
1470 The match found must not extend after that position.\n\
1471 Optional third argument, if t, means if fail just return nil (no error).\n\
1472 If not nil and not t, move to limit of search and return nil.\n\
1473 Optional fourth argument is repeat count--search for successive occurrences.\n\
1474 See also the functions `match-beginning', `match-end' and `replace-match'.")
1475 (regexp
, bound
, noerror
, count
)
1476 Lisp_Object regexp
, bound
, noerror
, count
;
1478 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 0);
1481 DEFUN ("posix-search-backward", Fposix_search_backward
, Sposix_search_backward
, 1, 4,
1482 "sPosix search backward: ",
1483 "Search backward from point for match for regular expression REGEXP.\n\
1484 Find the longest match in accord with Posix regular expression rules.\n\
1485 Set point to the beginning of the match, and return point.\n\
1486 The match found is the one starting last in the buffer\n\
1487 and yet ending before the origin of the search.\n\
1488 An optional second argument bounds the search; it is a buffer position.\n\
1489 The match found must start at or after that position.\n\
1490 Optional third argument, if t, means if fail just return nil (no error).\n\
1491 If not nil and not t, move to limit of search and return nil.\n\
1492 Optional fourth argument is repeat count--search for successive occurrences.\n\
1493 See also the functions `match-beginning', `match-end' and `replace-match'.")
1494 (regexp
, bound
, noerror
, count
)
1495 Lisp_Object regexp
, bound
, noerror
, count
;
1497 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 1);
1500 DEFUN ("posix-search-forward", Fposix_search_forward
, Sposix_search_forward
, 1, 4,
1502 "Search forward from point for regular expression REGEXP.\n\
1503 Find the longest match in accord with Posix regular expression rules.\n\
1504 Set point to the end of the occurrence found, and return point.\n\
1505 An optional second argument bounds the search; it is a buffer position.\n\
1506 The match found must not extend after that position.\n\
1507 Optional third argument, if t, means if fail just return nil (no error).\n\
1508 If not nil and not t, move to limit of search and return nil.\n\
1509 Optional fourth argument is repeat count--search for successive occurrences.\n\
1510 See also the functions `match-beginning', `match-end' and `replace-match'.")
1511 (regexp
, bound
, noerror
, count
)
1512 Lisp_Object regexp
, bound
, noerror
, count
;
1514 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 1);
1517 DEFUN ("replace-match", Freplace_match
, Sreplace_match
, 1, 5, 0,
1518 "Replace text matched by last search with NEWTEXT.\n\
1519 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
1520 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
1521 based on the replaced text.\n\
1522 If the replaced text has only capital letters\n\
1523 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
1524 If the replaced text has at least one word starting with a capital letter,\n\
1525 then capitalize each word in NEWTEXT.\n\n\
1526 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1527 Otherwise treat `\\' as special:\n\
1528 `\\&' in NEWTEXT means substitute original matched text.\n\
1529 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1530 If Nth parens didn't match, substitute nothing.\n\
1531 `\\\\' means insert one `\\'.\n\
1532 FIXEDCASE and LITERAL are optional arguments.\n\
1533 Leaves point at end of replacement text.\n\
1535 The optional fourth argument STRING can be a string to modify.\n\
1536 In that case, this function creates and returns a new string\n\
1537 which is made by replacing the part of STRING that was matched.\n\
1539 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
1540 It says to replace just that subexpression instead of the whole match.\n\
1541 This is useful only after a regular expression search or match\n\
1542 since only regular expressions have distinguished subexpressions.")
1543 (newtext
, fixedcase
, literal
, string
, subexp
)
1544 Lisp_Object newtext
, fixedcase
, literal
, string
, subexp
;
1546 enum { nochange
, all_caps
, cap_initial
} case_action
;
1547 register int pos
, last
;
1548 int some_multiletter_word
;
1551 int some_nonuppercase_initial
;
1552 register int c
, prevc
;
1555 int opoint
, newpoint
;
1557 CHECK_STRING (newtext
, 0);
1559 if (! NILP (string
))
1560 CHECK_STRING (string
, 4);
1562 case_action
= nochange
; /* We tried an initialization */
1563 /* but some C compilers blew it */
1565 if (search_regs
.num_regs
<= 0)
1566 error ("replace-match called before any match found");
1572 CHECK_NUMBER (subexp
, 3);
1573 sub
= XINT (subexp
);
1574 if (sub
< 0 || sub
>= search_regs
.num_regs
)
1575 args_out_of_range (subexp
, make_number (search_regs
.num_regs
));
1580 if (search_regs
.start
[sub
] < BEGV
1581 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
1582 || search_regs
.end
[sub
] > ZV
)
1583 args_out_of_range (make_number (search_regs
.start
[sub
]),
1584 make_number (search_regs
.end
[sub
]));
1588 if (search_regs
.start
[sub
] < 0
1589 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
1590 || search_regs
.end
[sub
] > XSTRING (string
)->size
)
1591 args_out_of_range (make_number (search_regs
.start
[sub
]),
1592 make_number (search_regs
.end
[sub
]));
1595 if (NILP (fixedcase
))
1598 /* Decide how to casify by examining the matched text. */
1601 last
= CHAR_TO_BYTE (search_regs
.end
[sub
]);
1603 last
= search_regs
.end
[sub
];
1606 beg
= CHAR_TO_BYTE (search_regs
.start
[sub
]);
1608 beg
= search_regs
.start
[sub
];
1611 case_action
= all_caps
;
1613 /* some_multiletter_word is set nonzero if any original word
1614 is more than one letter long. */
1615 some_multiletter_word
= 0;
1617 some_nonuppercase_initial
= 0;
1620 for (pos
= beg
; pos
< last
; pos
++)
1623 c
= FETCH_BYTE (pos
);
1625 c
= XSTRING (string
)->data
[pos
];
1629 /* Cannot be all caps if any original char is lower case */
1632 if (SYNTAX (prevc
) != Sword
)
1633 some_nonuppercase_initial
= 1;
1635 some_multiletter_word
= 1;
1637 else if (!NOCASEP (c
))
1640 if (SYNTAX (prevc
) != Sword
)
1643 some_multiletter_word
= 1;
1647 /* If the initial is a caseless word constituent,
1648 treat that like a lowercase initial. */
1649 if (SYNTAX (prevc
) != Sword
)
1650 some_nonuppercase_initial
= 1;
1656 /* Convert to all caps if the old text is all caps
1657 and has at least one multiletter word. */
1658 if (! some_lowercase
&& some_multiletter_word
)
1659 case_action
= all_caps
;
1660 /* Capitalize each word, if the old text has all capitalized words. */
1661 else if (!some_nonuppercase_initial
&& some_multiletter_word
)
1662 case_action
= cap_initial
;
1663 else if (!some_nonuppercase_initial
&& some_uppercase
)
1664 /* Should x -> yz, operating on X, give Yz or YZ?
1665 We'll assume the latter. */
1666 case_action
= all_caps
;
1668 case_action
= nochange
;
1671 /* Do replacement in a string. */
1674 Lisp_Object before
, after
;
1676 before
= Fsubstring (string
, make_number (0),
1677 make_number (search_regs
.start
[sub
]));
1678 after
= Fsubstring (string
, make_number (search_regs
.end
[sub
]), Qnil
);
1680 /* Substitute parts of the match into NEWTEXT
1685 /* We build up the substituted string in ACCUM. */
1691 for (pos
= 0; pos
< XSTRING (newtext
)->size
; pos
++)
1695 int delbackslash
= 0;
1697 c
= XSTRING (newtext
)->data
[pos
];
1700 c
= XSTRING (newtext
)->data
[++pos
];
1703 substart
= search_regs
.start
[sub
];
1704 subend
= search_regs
.end
[sub
];
1706 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
1708 if (search_regs
.start
[c
- '0'] >= 0)
1710 substart
= search_regs
.start
[c
- '0'];
1711 subend
= search_regs
.end
[c
- '0'];
1717 error ("Invalid use of `\\' in replacement text");
1721 if (pos
- 1 != lastpos
+ 1)
1722 middle
= Fsubstring (newtext
,
1723 make_number (lastpos
+ 1),
1724 make_number (pos
- 1));
1727 accum
= concat3 (accum
, middle
,
1728 Fsubstring (string
, make_number (substart
),
1729 make_number (subend
)));
1732 else if (delbackslash
)
1734 middle
= Fsubstring (newtext
, make_number (lastpos
+ 1),
1736 accum
= concat2 (accum
, middle
);
1741 if (pos
!= lastpos
+ 1)
1742 middle
= Fsubstring (newtext
, make_number (lastpos
+ 1),
1747 newtext
= concat2 (accum
, middle
);
1750 /* Do case substitution in NEWTEXT if desired. */
1751 if (case_action
== all_caps
)
1752 newtext
= Fupcase (newtext
);
1753 else if (case_action
== cap_initial
)
1754 newtext
= Fupcase_initials (newtext
);
1756 return concat3 (before
, newtext
, after
);
1759 /* Record point, the move (quietly) to the start of the match. */
1760 if (PT
> search_regs
.start
[sub
])
1765 TEMP_SET_PT (search_regs
.start
[sub
]);
1767 /* We insert the replacement text before the old text, and then
1768 delete the original text. This means that markers at the
1769 beginning or end of the original will float to the corresponding
1770 position in the replacement. */
1771 if (!NILP (literal
))
1772 Finsert_and_inherit (1, &newtext
);
1775 struct gcpro gcpro1
;
1778 for (pos
= 0; pos
< XSTRING (newtext
)->size
; pos
++)
1780 int offset
= PT
- search_regs
.start
[sub
];
1782 c
= XSTRING (newtext
)->data
[pos
];
1785 c
= XSTRING (newtext
)->data
[++pos
];
1787 Finsert_buffer_substring
1788 (Fcurrent_buffer (),
1789 make_number (search_regs
.start
[sub
] + offset
),
1790 make_number (search_regs
.end
[sub
] + offset
));
1791 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
1793 if (search_regs
.start
[c
- '0'] >= 1)
1794 Finsert_buffer_substring
1795 (Fcurrent_buffer (),
1796 make_number (search_regs
.start
[c
- '0'] + offset
),
1797 make_number (search_regs
.end
[c
- '0'] + offset
));
1802 error ("Invalid use of `\\' in replacement text");
1810 inslen
= PT
- (search_regs
.start
[sub
]);
1811 del_range (search_regs
.start
[sub
] + inslen
, search_regs
.end
[sub
] + inslen
);
1813 if (case_action
== all_caps
)
1814 Fupcase_region (make_number (PT
- inslen
), make_number (PT
));
1815 else if (case_action
== cap_initial
)
1816 Fupcase_initials_region (make_number (PT
- inslen
), make_number (PT
));
1820 /* Put point back where it was in the text. */
1822 TEMP_SET_PT (opoint
+ ZV
);
1824 TEMP_SET_PT (opoint
);
1826 /* Now move point "officially" to the start of the inserted replacement. */
1827 move_if_not_intangible (newpoint
);
1833 match_limit (num
, beginningp
)
1839 CHECK_NUMBER (num
, 0);
1841 if (n
< 0 || n
>= search_regs
.num_regs
)
1842 args_out_of_range (num
, make_number (search_regs
.num_regs
));
1843 if (search_regs
.num_regs
<= 0
1844 || search_regs
.start
[n
] < 0)
1846 return (make_number ((beginningp
) ? search_regs
.start
[n
]
1847 : search_regs
.end
[n
]));
1850 DEFUN ("match-beginning", Fmatch_beginning
, Smatch_beginning
, 1, 1, 0,
1851 "Return position of start of text matched by last search.\n\
1852 SUBEXP, a number, specifies which parenthesized expression in the last\n\
1854 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1856 Zero means the entire text matched by the whole regexp or whole string.")
1860 return match_limit (subexp
, 1);
1863 DEFUN ("match-end", Fmatch_end
, Smatch_end
, 1, 1, 0,
1864 "Return position of end of text matched by last search.\n\
1865 SUBEXP, a number, specifies which parenthesized expression in the last\n\
1867 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1869 Zero means the entire text matched by the whole regexp or whole string.")
1873 return match_limit (subexp
, 0);
1876 DEFUN ("match-data", Fmatch_data
, Smatch_data
, 0, 2, 0,
1877 "Return a list containing all info on what the last search matched.\n\
1878 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1879 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1880 if the last match was on a buffer; integers or nil if a string was matched.\n\
1881 Use `store-match-data' to reinstate the data in this list.\n\
1883 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
1884 \(rather than markers) to represent buffer positions.\n\
1885 If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
1886 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
1888 Lisp_Object integers
, reuse
;
1890 Lisp_Object tail
, prev
;
1894 if (NILP (last_thing_searched
))
1897 data
= (Lisp_Object
*) alloca ((2 * search_regs
.num_regs
)
1898 * sizeof (Lisp_Object
));
1901 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1903 int start
= search_regs
.start
[i
];
1906 if (EQ (last_thing_searched
, Qt
)
1907 || ! NILP (integers
))
1909 XSETFASTINT (data
[2 * i
], start
);
1910 XSETFASTINT (data
[2 * i
+ 1], search_regs
.end
[i
]);
1912 else if (BUFFERP (last_thing_searched
))
1914 data
[2 * i
] = Fmake_marker ();
1915 Fset_marker (data
[2 * i
],
1916 make_number (start
),
1917 last_thing_searched
);
1918 data
[2 * i
+ 1] = Fmake_marker ();
1919 Fset_marker (data
[2 * i
+ 1],
1920 make_number (search_regs
.end
[i
]),
1921 last_thing_searched
);
1924 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1930 data
[2 * i
] = data
[2 * i
+ 1] = Qnil
;
1933 /* If REUSE is not usable, cons up the values and return them. */
1934 if (! CONSP (reuse
))
1935 return Flist (2 * len
+ 2, data
);
1937 /* If REUSE is a list, store as many value elements as will fit
1938 into the elements of REUSE. */
1939 for (i
= 0, tail
= reuse
; CONSP (tail
);
1940 i
++, tail
= XCONS (tail
)->cdr
)
1942 if (i
< 2 * len
+ 2)
1943 XCONS (tail
)->car
= data
[i
];
1945 XCONS (tail
)->car
= Qnil
;
1949 /* If we couldn't fit all value elements into REUSE,
1950 cons up the rest of them and add them to the end of REUSE. */
1951 if (i
< 2 * len
+ 2)
1952 XCONS (prev
)->cdr
= Flist (2 * len
+ 2 - i
, data
+ i
);
1958 DEFUN ("store-match-data", Fstore_match_data
, Sstore_match_data
, 1, 1, 0,
1959 "Set internal data on last search match from elements of LIST.\n\
1960 LIST should have been created by calling `match-data' previously.")
1962 register Lisp_Object list
;
1965 register Lisp_Object marker
;
1967 if (running_asynch_code
)
1968 save_search_regs ();
1970 if (!CONSP (list
) && !NILP (list
))
1971 list
= wrong_type_argument (Qconsp
, list
);
1973 /* Unless we find a marker with a buffer in LIST, assume that this
1974 match data came from a string. */
1975 last_thing_searched
= Qt
;
1977 /* Allocate registers if they don't already exist. */
1979 int length
= XFASTINT (Flength (list
)) / 2;
1981 if (length
> search_regs
.num_regs
)
1983 if (search_regs
.num_regs
== 0)
1986 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
1988 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
1993 = (regoff_t
*) xrealloc (search_regs
.start
,
1994 length
* sizeof (regoff_t
));
1996 = (regoff_t
*) xrealloc (search_regs
.end
,
1997 length
* sizeof (regoff_t
));
2000 search_regs
.num_regs
= length
;
2004 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2006 marker
= Fcar (list
);
2009 search_regs
.start
[i
] = -1;
2014 if (MARKERP (marker
))
2016 if (XMARKER (marker
)->buffer
== 0)
2017 XSETFASTINT (marker
, 0);
2019 XSETBUFFER (last_thing_searched
, XMARKER (marker
)->buffer
);
2022 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
2023 search_regs
.start
[i
] = XINT (marker
);
2026 marker
= Fcar (list
);
2027 if (MARKERP (marker
) && XMARKER (marker
)->buffer
== 0)
2028 XSETFASTINT (marker
, 0);
2030 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
2031 search_regs
.end
[i
] = XINT (marker
);
2039 /* If non-zero the match data have been saved in saved_search_regs
2040 during the execution of a sentinel or filter. */
2041 static int search_regs_saved
;
2042 static struct re_registers saved_search_regs
;
2044 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2045 if asynchronous code (filter or sentinel) is running. */
2049 if (!search_regs_saved
)
2051 saved_search_regs
.num_regs
= search_regs
.num_regs
;
2052 saved_search_regs
.start
= search_regs
.start
;
2053 saved_search_regs
.end
= search_regs
.end
;
2054 search_regs
.num_regs
= 0;
2055 search_regs
.start
= 0;
2056 search_regs
.end
= 0;
2058 search_regs_saved
= 1;
2062 /* Called upon exit from filters and sentinels. */
2064 restore_match_data ()
2066 if (search_regs_saved
)
2068 if (search_regs
.num_regs
> 0)
2070 xfree (search_regs
.start
);
2071 xfree (search_regs
.end
);
2073 search_regs
.num_regs
= saved_search_regs
.num_regs
;
2074 search_regs
.start
= saved_search_regs
.start
;
2075 search_regs
.end
= saved_search_regs
.end
;
2077 search_regs_saved
= 0;
2081 /* Quote a string to inactivate reg-expr chars */
2083 DEFUN ("regexp-quote", Fregexp_quote
, Sregexp_quote
, 1, 1, 0,
2084 "Return a regexp string which matches exactly STRING and nothing else.")
2088 register unsigned char *in
, *out
, *end
;
2089 register unsigned char *temp
;
2091 CHECK_STRING (string
, 0);
2093 temp
= (unsigned char *) alloca (XSTRING (string
)->size
* 2);
2095 /* Now copy the data into the new string, inserting escapes. */
2097 in
= XSTRING (string
)->data
;
2098 end
= in
+ XSTRING (string
)->size
;
2101 for (; in
!= end
; in
++)
2103 if (*in
== '[' || *in
== ']'
2104 || *in
== '*' || *in
== '.' || *in
== '\\'
2105 || *in
== '?' || *in
== '+'
2106 || *in
== '^' || *in
== '$')
2111 return make_string (temp
, out
- temp
);
2118 for (i
= 0; i
< REGEXP_CACHE_SIZE
; ++i
)
2120 searchbufs
[i
].buf
.allocated
= 100;
2121 searchbufs
[i
].buf
.buffer
= (unsigned char *) malloc (100);
2122 searchbufs
[i
].buf
.fastmap
= searchbufs
[i
].fastmap
;
2123 searchbufs
[i
].regexp
= Qnil
;
2124 staticpro (&searchbufs
[i
].regexp
);
2125 searchbufs
[i
].next
= (i
== REGEXP_CACHE_SIZE
-1 ? 0 : &searchbufs
[i
+1]);
2127 searchbuf_head
= &searchbufs
[0];
2129 Qsearch_failed
= intern ("search-failed");
2130 staticpro (&Qsearch_failed
);
2131 Qinvalid_regexp
= intern ("invalid-regexp");
2132 staticpro (&Qinvalid_regexp
);
2134 Fput (Qsearch_failed
, Qerror_conditions
,
2135 Fcons (Qsearch_failed
, Fcons (Qerror
, Qnil
)));
2136 Fput (Qsearch_failed
, Qerror_message
,
2137 build_string ("Search failed"));
2139 Fput (Qinvalid_regexp
, Qerror_conditions
,
2140 Fcons (Qinvalid_regexp
, Fcons (Qerror
, Qnil
)));
2141 Fput (Qinvalid_regexp
, Qerror_message
,
2142 build_string ("Invalid regexp"));
2144 last_thing_searched
= Qnil
;
2145 staticpro (&last_thing_searched
);
2147 defsubr (&Slooking_at
);
2148 defsubr (&Sposix_looking_at
);
2149 defsubr (&Sstring_match
);
2150 defsubr (&Sposix_string_match
);
2151 defsubr (&Ssearch_forward
);
2152 defsubr (&Ssearch_backward
);
2153 defsubr (&Sword_search_forward
);
2154 defsubr (&Sword_search_backward
);
2155 defsubr (&Sre_search_forward
);
2156 defsubr (&Sre_search_backward
);
2157 defsubr (&Sposix_search_forward
);
2158 defsubr (&Sposix_search_backward
);
2159 defsubr (&Sreplace_match
);
2160 defsubr (&Smatch_beginning
);
2161 defsubr (&Smatch_end
);
2162 defsubr (&Smatch_data
);
2163 defsubr (&Sstore_match_data
);
2164 defsubr (&Sregexp_quote
);