1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
28 #include "region-cache.h"
30 #include "blockinput.h"
31 #include "intervals.h"
33 #include <sys/types.h>
36 #define REGEXP_CACHE_SIZE 20
38 /* If the regexp is non-nil, then the buffer contains the compiled form
39 of that regexp, suitable for searching. */
42 struct regexp_cache
*next
;
44 struct re_pattern_buffer buf
;
46 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
50 /* The instances of that struct. */
51 struct regexp_cache searchbufs
[REGEXP_CACHE_SIZE
];
53 /* The head of the linked list; points to the most recently used buffer. */
54 struct regexp_cache
*searchbuf_head
;
57 /* Every call to re_match, etc., must pass &search_regs as the regs
58 argument unless you can show it is unnecessary (i.e., if re_match
59 is certainly going to be called again before region-around-match
62 Since the registers are now dynamically allocated, we need to make
63 sure not to refer to the Nth register before checking that it has
64 been allocated by checking search_regs.num_regs.
66 The regex code keeps track of whether it has allocated the search
67 buffer using bits in the re_pattern_buffer. This means that whenever
68 you compile a new pattern, it completely forgets whether it has
69 allocated any registers, and will allocate new registers the next
70 time you call a searching or matching function. Therefore, we need
71 to call re_set_registers after compiling a new pattern or after
72 setting the match registers, so that the regex functions will be
73 able to free or re-allocate it properly. */
74 static struct re_registers search_regs
;
76 /* The buffer in which the last search was performed, or
77 Qt if the last search was done in a string;
78 Qnil if no searching has been done yet. */
79 static Lisp_Object last_thing_searched
;
81 /* error condition signaled when regexp compile_pattern fails */
83 Lisp_Object Qinvalid_regexp
;
85 static void set_search_regs ();
86 static void save_search_regs ();
87 static int simple_search ();
88 static int boyer_moore ();
89 static int search_buffer ();
94 error ("Stack overflow in regexp matcher");
103 /* Compile a regexp and signal a Lisp error if anything goes wrong.
104 PATTERN is the pattern to compile.
105 CP is the place to put the result.
106 TRANSLATE is a translation table for ignoring case, or nil for none.
107 REGP is the structure that says where to store the "register"
108 values that will result from matching this pattern.
109 If it is 0, we should compile the pattern not to record any
110 subexpression bounds.
111 POSIX is nonzero if we want full backtracking (POSIX style)
112 for this pattern. 0 means backtrack only enough to get a valid match.
113 MULTIBYTE is nonzero if we want to handle multibyte characters in
114 PATTERN. 0 means all multibyte characters are recognized just as
115 sequences of binary data. */
118 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
)
119 struct regexp_cache
*cp
;
121 Lisp_Object translate
;
122 struct re_registers
*regp
;
127 int raw_pattern_size
;
131 /* MULTIBYTE says whether the text to be searched is multibyte.
132 We must convert PATTERN to match that, or we will not really
133 find things right. */
135 if (multibyte
== STRING_MULTIBYTE (pattern
))
137 raw_pattern
= (char *) XSTRING (pattern
)->data
;
138 raw_pattern_size
= STRING_BYTES (XSTRING (pattern
));
142 raw_pattern_size
= count_size_as_multibyte (XSTRING (pattern
)->data
,
143 XSTRING (pattern
)->size
);
144 raw_pattern
= (char *) alloca (raw_pattern_size
+ 1);
145 copy_text (XSTRING (pattern
)->data
, raw_pattern
,
146 XSTRING (pattern
)->size
, 0, 1);
150 /* Converting multibyte to single-byte.
152 ??? Perhaps this conversion should be done in a special way
153 by subtracting nonascii-insert-offset from each non-ASCII char,
154 so that only the multibyte chars which really correspond to
155 the chosen single-byte character set can possibly match. */
156 raw_pattern_size
= XSTRING (pattern
)->size
;
157 raw_pattern
= (char *) alloca (raw_pattern_size
+ 1);
158 copy_text (XSTRING (pattern
)->data
, raw_pattern
,
159 STRING_BYTES (XSTRING (pattern
)), 1, 0);
163 cp
->buf
.translate
= (! NILP (translate
) ? translate
: 0);
165 cp
->buf
.multibyte
= multibyte
;
167 old
= re_set_syntax (RE_SYNTAX_EMACS
168 | (posix
? 0 : RE_NO_POSIX_BACKTRACKING
));
169 val
= (char *) re_compile_pattern (raw_pattern
, raw_pattern_size
, &cp
->buf
);
173 Fsignal (Qinvalid_regexp
, Fcons (build_string (val
), Qnil
));
175 cp
->regexp
= Fcopy_sequence (pattern
);
178 /* Compile a regexp if necessary, but first check to see if there's one in
180 PATTERN is the pattern to compile.
181 TRANSLATE is a translation table for ignoring case, or nil for none.
182 REGP is the structure that says where to store the "register"
183 values that will result from matching this pattern.
184 If it is 0, we should compile the pattern not to record any
185 subexpression bounds.
186 POSIX is nonzero if we want full backtracking (POSIX style)
187 for this pattern. 0 means backtrack only enough to get a valid match. */
189 struct re_pattern_buffer
*
190 compile_pattern (pattern
, regp
, translate
, posix
, multibyte
)
192 struct re_registers
*regp
;
193 Lisp_Object translate
;
194 int posix
, multibyte
;
196 struct regexp_cache
*cp
, **cpp
;
198 for (cpp
= &searchbuf_head
; ; cpp
= &cp
->next
)
201 if (XSTRING (cp
->regexp
)->size
== XSTRING (pattern
)->size
202 && !NILP (Fstring_equal (cp
->regexp
, pattern
))
203 && cp
->buf
.translate
== (! NILP (translate
) ? translate
: 0)
204 && cp
->posix
== posix
205 && cp
->buf
.multibyte
== multibyte
)
208 /* If we're at the end of the cache, compile into the last cell. */
211 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
);
216 /* When we get here, cp (aka *cpp) contains the compiled pattern,
217 either because we found it in the cache or because we just compiled it.
218 Move it to the front of the queue to mark it as most recently used. */
220 cp
->next
= searchbuf_head
;
223 /* Advise the searching functions about the space we have allocated
224 for register data. */
226 re_set_registers (&cp
->buf
, regp
, regp
->num_regs
, regp
->start
, regp
->end
);
231 /* Error condition used for failing searches */
232 Lisp_Object Qsearch_failed
;
238 Fsignal (Qsearch_failed
, Fcons (arg
, Qnil
));
243 looking_at_1 (string
, posix
)
248 unsigned char *p1
, *p2
;
251 struct re_pattern_buffer
*bufp
;
253 if (running_asynch_code
)
256 CHECK_STRING (string
, 0);
257 bufp
= compile_pattern (string
, &search_regs
,
258 (!NILP (current_buffer
->case_fold_search
)
259 ? DOWNCASE_TABLE
: Qnil
),
261 !NILP (current_buffer
->enable_multibyte_characters
));
264 QUIT
; /* Do a pending quit right away, to avoid paradoxical behavior */
266 /* Get pointers and sizes of the two strings
267 that make up the visible portion of the buffer. */
270 s1
= GPT_BYTE
- BEGV_BYTE
;
272 s2
= ZV_BYTE
- GPT_BYTE
;
276 s2
= ZV_BYTE
- BEGV_BYTE
;
281 s1
= ZV_BYTE
- BEGV_BYTE
;
285 re_match_object
= Qnil
;
287 i
= re_match_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
288 PT_BYTE
- BEGV_BYTE
, &search_regs
,
289 ZV_BYTE
- BEGV_BYTE
);
293 val
= (0 <= i
? Qt
: Qnil
);
295 for (i
= 0; i
< search_regs
.num_regs
; i
++)
296 if (search_regs
.start
[i
] >= 0)
299 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
301 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
303 XSETBUFFER (last_thing_searched
, current_buffer
);
308 DEFUN ("looking-at", Flooking_at
, Slooking_at
, 1, 1, 0,
309 "Return t if text after point matches regular expression REGEXP.\n\
310 This function modifies the match data that `match-beginning',\n\
311 `match-end' and `match-data' access; save and restore the match\n\
312 data if you want to preserve them.")
316 return looking_at_1 (regexp
, 0);
319 DEFUN ("posix-looking-at", Fposix_looking_at
, Sposix_looking_at
, 1, 1, 0,
320 "Return t if text after point matches regular expression REGEXP.\n\
321 Find the longest match, in accord with Posix regular expression rules.\n\
322 This function modifies the match data that `match-beginning',\n\
323 `match-end' and `match-data' access; save and restore the match\n\
324 data if you want to preserve them.")
328 return looking_at_1 (regexp
, 1);
332 string_match_1 (regexp
, string
, start
, posix
)
333 Lisp_Object regexp
, string
, start
;
337 struct re_pattern_buffer
*bufp
;
341 if (running_asynch_code
)
344 CHECK_STRING (regexp
, 0);
345 CHECK_STRING (string
, 1);
348 pos
= 0, pos_byte
= 0;
351 int len
= XSTRING (string
)->size
;
353 CHECK_NUMBER (start
, 2);
355 if (pos
< 0 && -pos
<= len
)
357 else if (0 > pos
|| pos
> len
)
358 args_out_of_range (string
, start
);
359 pos_byte
= string_char_to_byte (string
, pos
);
362 bufp
= compile_pattern (regexp
, &search_regs
,
363 (!NILP (current_buffer
->case_fold_search
)
364 ? DOWNCASE_TABLE
: Qnil
),
366 STRING_MULTIBYTE (string
));
368 re_match_object
= string
;
370 val
= re_search (bufp
, (char *) XSTRING (string
)->data
,
371 STRING_BYTES (XSTRING (string
)), pos_byte
,
372 STRING_BYTES (XSTRING (string
)) - pos_byte
,
375 last_thing_searched
= Qt
;
378 if (val
< 0) return Qnil
;
380 for (i
= 0; i
< search_regs
.num_regs
; i
++)
381 if (search_regs
.start
[i
] >= 0)
384 = string_byte_to_char (string
, search_regs
.start
[i
]);
386 = string_byte_to_char (string
, search_regs
.end
[i
]);
389 return make_number (string_byte_to_char (string
, val
));
392 DEFUN ("string-match", Fstring_match
, Sstring_match
, 2, 3, 0,
393 "Return index of start of first match for REGEXP in STRING, or nil.\n\
394 If third arg START is non-nil, start search at that index in STRING.\n\
395 For index of first char beyond the match, do (match-end 0).\n\
396 `match-end' and `match-beginning' also give indices of substrings\n\
397 matched by parenthesis constructs in the pattern.")
398 (regexp
, string
, start
)
399 Lisp_Object regexp
, string
, start
;
401 return string_match_1 (regexp
, string
, start
, 0);
404 DEFUN ("posix-string-match", Fposix_string_match
, Sposix_string_match
, 2, 3, 0,
405 "Return index of start of first match for REGEXP in STRING, or nil.\n\
406 Find the longest match, in accord with Posix regular expression rules.\n\
407 If third arg START is non-nil, start search at that index in STRING.\n\
408 For index of first char beyond the match, do (match-end 0).\n\
409 `match-end' and `match-beginning' also give indices of substrings\n\
410 matched by parenthesis constructs in the pattern.")
411 (regexp
, string
, start
)
412 Lisp_Object regexp
, string
, start
;
414 return string_match_1 (regexp
, string
, start
, 1);
417 /* Match REGEXP against STRING, searching all of STRING,
418 and return the index of the match, or negative on failure.
419 This does not clobber the match data. */
422 fast_string_match (regexp
, string
)
423 Lisp_Object regexp
, string
;
426 struct re_pattern_buffer
*bufp
;
428 bufp
= compile_pattern (regexp
, 0, Qnil
,
429 0, STRING_MULTIBYTE (string
));
431 re_match_object
= string
;
433 val
= re_search (bufp
, (char *) XSTRING (string
)->data
,
434 STRING_BYTES (XSTRING (string
)), 0,
435 STRING_BYTES (XSTRING (string
)), 0);
440 /* Match REGEXP against STRING, searching all of STRING ignoring case,
441 and return the index of the match, or negative on failure.
442 This does not clobber the match data.
443 We assume that STRING contains single-byte characters. */
445 extern Lisp_Object Vascii_downcase_table
;
448 fast_c_string_match_ignore_case (regexp
, string
)
453 struct re_pattern_buffer
*bufp
;
454 int len
= strlen (string
);
456 regexp
= string_make_unibyte (regexp
);
457 re_match_object
= Qt
;
458 bufp
= compile_pattern (regexp
, 0,
459 Vascii_downcase_table
, 0,
462 val
= re_search (bufp
, string
, len
, 0, len
, 0);
473 return ((a
> b
) ? a
: b
);
480 return ((a
< b
) ? a
: b
);
484 /* The newline cache: remembering which sections of text have no newlines. */
486 /* If the user has requested newline caching, make sure it's on.
487 Otherwise, make sure it's off.
488 This is our cheezy way of associating an action with the change of
489 state of a buffer-local variable. */
491 newline_cache_on_off (buf
)
494 if (NILP (buf
->cache_long_line_scans
))
496 /* It should be off. */
497 if (buf
->newline_cache
)
499 free_region_cache (buf
->newline_cache
);
500 buf
->newline_cache
= 0;
505 /* It should be on. */
506 if (buf
->newline_cache
== 0)
507 buf
->newline_cache
= new_region_cache ();
512 /* Search for COUNT instances of the character TARGET between START and END.
514 If COUNT is positive, search forwards; END must be >= START.
515 If COUNT is negative, search backwards for the -COUNTth instance;
516 END must be <= START.
517 If COUNT is zero, do anything you please; run rogue, for all I care.
519 If END is zero, use BEGV or ZV instead, as appropriate for the
520 direction indicated by COUNT.
522 If we find COUNT instances, set *SHORTAGE to zero, and return the
523 position after the COUNTth match. Note that for reverse motion
524 this is not the same as the usual convention for Emacs motion commands.
526 If we don't find COUNT instances before reaching END, set *SHORTAGE
527 to the number of TARGETs left unfound, and return END.
529 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
530 except when inside redisplay. */
532 scan_buffer (target
, start
, end
, count
, shortage
, allow_quit
)
539 struct region_cache
*newline_cache
;
550 if (! end
) end
= BEGV
;
553 newline_cache_on_off (current_buffer
);
554 newline_cache
= current_buffer
->newline_cache
;
559 immediate_quit
= allow_quit
;
564 /* Our innermost scanning loop is very simple; it doesn't know
565 about gaps, buffer ends, or the newline cache. ceiling is
566 the position of the last character before the next such
567 obstacle --- the last character the dumb search loop should
569 int ceiling_byte
= CHAR_TO_BYTE (end
) - 1;
570 int start_byte
= CHAR_TO_BYTE (start
);
572 /* If we're looking for a newline, consult the newline cache
573 to see where we can avoid some scanning. */
574 if (target
== '\n' && newline_cache
)
578 while (region_cache_forward
579 (current_buffer
, newline_cache
, start_byte
, &next_change
))
580 start_byte
= next_change
;
581 immediate_quit
= allow_quit
;
583 /* START should never be after END. */
584 if (start_byte
> ceiling_byte
)
585 start_byte
= ceiling_byte
;
587 /* Now the text after start is an unknown region, and
588 next_change is the position of the next known region. */
589 ceiling_byte
= min (next_change
- 1, ceiling_byte
);
592 /* The dumb loop can only scan text stored in contiguous
593 bytes. BUFFER_CEILING_OF returns the last character
594 position that is contiguous, so the ceiling is the
595 position after that. */
596 ceiling_byte
= min (BUFFER_CEILING_OF (start_byte
), ceiling_byte
);
599 /* The termination address of the dumb loop. */
600 register unsigned char *ceiling_addr
601 = BYTE_POS_ADDR (ceiling_byte
) + 1;
602 register unsigned char *cursor
603 = BYTE_POS_ADDR (start_byte
);
604 unsigned char *base
= cursor
;
606 while (cursor
< ceiling_addr
)
608 unsigned char *scan_start
= cursor
;
611 while (*cursor
!= target
&& ++cursor
< ceiling_addr
)
614 /* If we're looking for newlines, cache the fact that
615 the region from start to cursor is free of them. */
616 if (target
== '\n' && newline_cache
)
617 know_region_cache (current_buffer
, newline_cache
,
618 start_byte
+ scan_start
- base
,
619 start_byte
+ cursor
- base
);
621 /* Did we find the target character? */
622 if (cursor
< ceiling_addr
)
627 return BYTE_TO_CHAR (start_byte
+ cursor
- base
+ 1);
633 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
639 /* The last character to check before the next obstacle. */
640 int ceiling_byte
= CHAR_TO_BYTE (end
);
641 int start_byte
= CHAR_TO_BYTE (start
);
643 /* Consult the newline cache, if appropriate. */
644 if (target
== '\n' && newline_cache
)
648 while (region_cache_backward
649 (current_buffer
, newline_cache
, start_byte
, &next_change
))
650 start_byte
= next_change
;
651 immediate_quit
= allow_quit
;
653 /* Start should never be at or before end. */
654 if (start_byte
<= ceiling_byte
)
655 start_byte
= ceiling_byte
+ 1;
657 /* Now the text before start is an unknown region, and
658 next_change is the position of the next known region. */
659 ceiling_byte
= max (next_change
, ceiling_byte
);
662 /* Stop scanning before the gap. */
663 ceiling_byte
= max (BUFFER_FLOOR_OF (start_byte
- 1), ceiling_byte
);
666 /* The termination address of the dumb loop. */
667 register unsigned char *ceiling_addr
= BYTE_POS_ADDR (ceiling_byte
);
668 register unsigned char *cursor
= BYTE_POS_ADDR (start_byte
- 1);
669 unsigned char *base
= cursor
;
671 while (cursor
>= ceiling_addr
)
673 unsigned char *scan_start
= cursor
;
675 while (*cursor
!= target
&& --cursor
>= ceiling_addr
)
678 /* If we're looking for newlines, cache the fact that
679 the region from after the cursor to start is free of them. */
680 if (target
== '\n' && newline_cache
)
681 know_region_cache (current_buffer
, newline_cache
,
682 start_byte
+ cursor
- base
,
683 start_byte
+ scan_start
- base
);
685 /* Did we find the target character? */
686 if (cursor
>= ceiling_addr
)
691 return BYTE_TO_CHAR (start_byte
+ cursor
- base
);
697 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
703 *shortage
= count
* direction
;
707 /* Search for COUNT instances of a line boundary, which means either a
708 newline or (if selective display enabled) a carriage return.
709 Start at START. If COUNT is negative, search backwards.
711 We report the resulting position by calling TEMP_SET_PT_BOTH.
713 If we find COUNT instances. we position after (always after,
714 even if scanning backwards) the COUNTth match, and return 0.
716 If we don't find COUNT instances before reaching the end of the
717 buffer (or the beginning, if scanning backwards), we return
718 the number of line boundaries left unfound, and position at
719 the limit we bumped up against.
721 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
722 except in special cases. */
725 scan_newline (start
, start_byte
, limit
, limit_byte
, count
, allow_quit
)
726 int start
, start_byte
;
727 int limit
, limit_byte
;
731 int direction
= ((count
> 0) ? 1 : -1);
733 register unsigned char *cursor
;
736 register int ceiling
;
737 register unsigned char *ceiling_addr
;
739 int old_immediate_quit
= immediate_quit
;
741 /* If we are not in selective display mode,
742 check only for newlines. */
743 int selective_display
= (!NILP (current_buffer
->selective_display
)
744 && !INTEGERP (current_buffer
->selective_display
));
746 /* The code that follows is like scan_buffer
747 but checks for either newline or carriage return. */
752 start_byte
= CHAR_TO_BYTE (start
);
756 while (start_byte
< limit_byte
)
758 ceiling
= BUFFER_CEILING_OF (start_byte
);
759 ceiling
= min (limit_byte
- 1, ceiling
);
760 ceiling_addr
= BYTE_POS_ADDR (ceiling
) + 1;
761 base
= (cursor
= BYTE_POS_ADDR (start_byte
));
764 while (*cursor
!= '\n' && ++cursor
!= ceiling_addr
)
767 if (cursor
!= ceiling_addr
)
771 immediate_quit
= old_immediate_quit
;
772 start_byte
= start_byte
+ cursor
- base
+ 1;
773 start
= BYTE_TO_CHAR (start_byte
);
774 TEMP_SET_PT_BOTH (start
, start_byte
);
778 if (++cursor
== ceiling_addr
)
784 start_byte
+= cursor
- base
;
789 while (start_byte
> limit_byte
)
791 ceiling
= BUFFER_FLOOR_OF (start_byte
- 1);
792 ceiling
= max (limit_byte
, ceiling
);
793 ceiling_addr
= BYTE_POS_ADDR (ceiling
) - 1;
794 base
= (cursor
= BYTE_POS_ADDR (start_byte
- 1) + 1);
797 while (--cursor
!= ceiling_addr
&& *cursor
!= '\n')
800 if (cursor
!= ceiling_addr
)
804 immediate_quit
= old_immediate_quit
;
805 /* Return the position AFTER the match we found. */
806 start_byte
= start_byte
+ cursor
- base
+ 1;
807 start
= BYTE_TO_CHAR (start_byte
);
808 TEMP_SET_PT_BOTH (start
, start_byte
);
815 /* Here we add 1 to compensate for the last decrement
816 of CURSOR, which took it past the valid range. */
817 start_byte
+= cursor
- base
+ 1;
821 TEMP_SET_PT_BOTH (limit
, limit_byte
);
822 immediate_quit
= old_immediate_quit
;
824 return count
* direction
;
828 find_next_newline_no_quit (from
, cnt
)
829 register int from
, cnt
;
831 return scan_buffer ('\n', from
, 0, cnt
, (int *) 0, 0);
834 /* Like find_next_newline, but returns position before the newline,
835 not after, and only search up to TO. This isn't just
836 find_next_newline (...)-1, because you might hit TO. */
839 find_before_next_newline (from
, to
, cnt
)
843 int pos
= scan_buffer ('\n', from
, to
, cnt
, &shortage
, 1);
851 /* Subroutines of Lisp buffer search functions. */
854 search_command (string
, bound
, noerror
, count
, direction
, RE
, posix
)
855 Lisp_Object string
, bound
, noerror
, count
;
866 CHECK_NUMBER (count
, 3);
870 CHECK_STRING (string
, 0);
874 lim
= ZV
, lim_byte
= ZV_BYTE
;
876 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
880 CHECK_NUMBER_COERCE_MARKER (bound
, 1);
882 if (n
> 0 ? lim
< PT
: lim
> PT
)
883 error ("Invalid search bound (wrong side of point)");
885 lim
= ZV
, lim_byte
= ZV_BYTE
;
887 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
889 lim_byte
= CHAR_TO_BYTE (lim
);
892 np
= search_buffer (string
, PT
, PT_BYTE
, lim
, lim_byte
, n
, RE
,
893 (!NILP (current_buffer
->case_fold_search
)
894 ? current_buffer
->case_canon_table
896 (!NILP (current_buffer
->case_fold_search
)
897 ? current_buffer
->case_eqv_table
903 return signal_failure (string
);
904 if (!EQ (noerror
, Qt
))
906 if (lim
< BEGV
|| lim
> ZV
)
908 SET_PT_BOTH (lim
, lim_byte
);
910 #if 0 /* This would be clean, but maybe programs depend on
911 a value of nil here. */
919 if (np
< BEGV
|| np
> ZV
)
924 return make_number (np
);
927 /* Return 1 if REGEXP it matches just one constant string. */
930 trivial_regexp_p (regexp
)
933 int len
= STRING_BYTES (XSTRING (regexp
));
934 unsigned char *s
= XSTRING (regexp
)->data
;
940 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
947 case '|': case '(': case ')': case '`': case '\'': case 'b':
948 case 'B': case '<': case '>': case 'w': case 'W': case 's':
950 case 'c': case 'C': /* for categoryspec and notcategoryspec */
951 case '1': case '2': case '3': case '4': case '5':
952 case '6': case '7': case '8': case '9':
960 /* Search for the n'th occurrence of STRING in the current buffer,
961 starting at position POS and stopping at position LIM,
962 treating STRING as a literal string if RE is false or as
963 a regular expression if RE is true.
965 If N is positive, searching is forward and LIM must be greater than POS.
966 If N is negative, searching is backward and LIM must be less than POS.
968 Returns -x if x occurrences remain to be found (x > 0),
969 or else the position at the beginning of the Nth occurrence
970 (if searching backward) or the end (if searching forward).
972 POSIX is nonzero if we want full backtracking (POSIX style)
973 for this pattern. 0 means backtrack only enough to get a valid match. */
975 #define TRANSLATE(out, trt, d) \
981 temp = Faref (trt, make_number (d)); \
982 if (INTEGERP (temp)) \
993 search_buffer (string
, pos
, pos_byte
, lim
, lim_byte
, n
,
994 RE
, trt
, inverse_trt
, posix
)
1003 Lisp_Object inverse_trt
;
1006 int len
= XSTRING (string
)->size
;
1007 int len_byte
= STRING_BYTES (XSTRING (string
));
1010 if (running_asynch_code
)
1011 save_search_regs ();
1013 /* Null string is found at starting position. */
1016 set_search_regs (pos
, 0);
1020 /* Searching 0 times means don't move. */
1024 if (RE
&& !trivial_regexp_p (string
))
1026 unsigned char *p1
, *p2
;
1028 struct re_pattern_buffer
*bufp
;
1030 bufp
= compile_pattern (string
, &search_regs
, trt
, posix
,
1031 !NILP (current_buffer
->enable_multibyte_characters
));
1033 immediate_quit
= 1; /* Quit immediately if user types ^G,
1034 because letting this function finish
1035 can take too long. */
1036 QUIT
; /* Do a pending quit right away,
1037 to avoid paradoxical behavior */
1038 /* Get pointers and sizes of the two strings
1039 that make up the visible portion of the buffer. */
1042 s1
= GPT_BYTE
- BEGV_BYTE
;
1044 s2
= ZV_BYTE
- GPT_BYTE
;
1048 s2
= ZV_BYTE
- BEGV_BYTE
;
1053 s1
= ZV_BYTE
- BEGV_BYTE
;
1056 re_match_object
= Qnil
;
1061 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1062 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1064 /* Don't allow match past current point */
1065 pos_byte
- BEGV_BYTE
);
1068 matcher_overflow ();
1072 pos_byte
= search_regs
.start
[0] + BEGV_BYTE
;
1073 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1074 if (search_regs
.start
[i
] >= 0)
1076 search_regs
.start
[i
]
1077 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1079 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1081 XSETBUFFER (last_thing_searched
, current_buffer
);
1082 /* Set pos to the new position. */
1083 pos
= search_regs
.start
[0];
1095 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1096 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1098 lim_byte
- BEGV_BYTE
);
1101 matcher_overflow ();
1105 pos_byte
= search_regs
.end
[0] + BEGV_BYTE
;
1106 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1107 if (search_regs
.start
[i
] >= 0)
1109 search_regs
.start
[i
]
1110 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1112 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1114 XSETBUFFER (last_thing_searched
, current_buffer
);
1115 pos
= search_regs
.end
[0];
1127 else /* non-RE case */
1129 unsigned char *raw_pattern
, *pat
;
1130 int raw_pattern_size
;
1131 int raw_pattern_size_byte
;
1132 unsigned char *patbuf
;
1133 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
1134 unsigned char *base_pat
= XSTRING (string
)->data
;
1135 int charset_base
= -1;
1138 /* MULTIBYTE says whether the text to be searched is multibyte.
1139 We must convert PATTERN to match that, or we will not really
1140 find things right. */
1142 if (multibyte
== STRING_MULTIBYTE (string
))
1144 raw_pattern
= (char *) XSTRING (string
)->data
;
1145 raw_pattern_size
= XSTRING (string
)->size
;
1146 raw_pattern_size_byte
= STRING_BYTES (XSTRING (string
));
1150 raw_pattern_size
= XSTRING (string
)->size
;
1151 raw_pattern_size_byte
1152 = count_size_as_multibyte (XSTRING (string
)->data
,
1154 raw_pattern
= (char *) alloca (raw_pattern_size_byte
+ 1);
1155 copy_text (XSTRING (string
)->data
, raw_pattern
,
1156 XSTRING (string
)->size
, 0, 1);
1160 /* Converting multibyte to single-byte.
1162 ??? Perhaps this conversion should be done in a special way
1163 by subtracting nonascii-insert-offset from each non-ASCII char,
1164 so that only the multibyte chars which really correspond to
1165 the chosen single-byte character set can possibly match. */
1166 raw_pattern_size
= XSTRING (string
)->size
;
1167 raw_pattern_size_byte
= XSTRING (string
)->size
;
1168 raw_pattern
= (char *) alloca (raw_pattern_size
+ 1);
1169 copy_text (XSTRING (string
)->data
, raw_pattern
,
1170 STRING_BYTES (XSTRING (string
)), 1, 0);
1173 /* Copy and optionally translate the pattern. */
1174 len
= raw_pattern_size
;
1175 len_byte
= raw_pattern_size_byte
;
1176 patbuf
= (unsigned char *) alloca (len_byte
);
1178 base_pat
= raw_pattern
;
1183 unsigned char workbuf
[4], *str
;
1184 int c
, translated
, inverse
;
1185 int in_charlen
, charlen
;
1187 /* If we got here and the RE flag is set, it's because we're
1188 dealing with a regexp known to be trivial, so the backslash
1189 just quotes the next character. */
1190 if (RE
&& *base_pat
== '\\')
1197 c
= STRING_CHAR_AND_LENGTH (base_pat
, len_byte
, in_charlen
);
1198 /* Translate the character, if requested. */
1199 TRANSLATE (translated
, trt
, c
);
1200 /* If translation changed the byte-length, go back
1201 to the original character. */
1202 charlen
= CHAR_STRING (translated
, workbuf
, str
);
1203 if (in_charlen
!= charlen
)
1206 charlen
= CHAR_STRING (c
, workbuf
, str
);
1209 TRANSLATE (inverse
, inverse_trt
, c
);
1211 /* Did this char actually get translated?
1212 Would any other char get translated into it? */
1213 if (translated
!= c
|| inverse
!= c
)
1215 /* Keep track of which character set row
1216 contains the characters that need translation. */
1217 int charset_base_code
= c
& ~0xff;
1218 if (charset_base
== -1)
1219 charset_base
= charset_base_code
;
1220 else if (charset_base
!= charset_base_code
)
1221 /* If two different rows appear, needing translation,
1222 then we cannot use boyer_moore search. */
1224 /* ??? Handa: this must do simple = 0
1225 if c is a composite character. */
1228 /* Store this character into the translated pattern. */
1229 bcopy (str
, pat
, charlen
);
1231 base_pat
+= in_charlen
;
1232 len_byte
-= in_charlen
;
1239 int c
, translated
, inverse
;
1241 /* If we got here and the RE flag is set, it's because we're
1242 dealing with a regexp known to be trivial, so the backslash
1243 just quotes the next character. */
1244 if (RE
&& *base_pat
== '\\')
1250 TRANSLATE (translated
, trt
, c
);
1251 TRANSLATE (inverse
, inverse_trt
, c
);
1253 /* Did this char actually get translated?
1254 Would any other char get translated into it? */
1255 if (translated
!= c
|| inverse
!= c
)
1257 /* Keep track of which character set row
1258 contains the characters that need translation. */
1259 int charset_base_code
= c
& ~0xff;
1260 if (charset_base
== -1)
1261 charset_base
= charset_base_code
;
1262 else if (charset_base
!= charset_base_code
)
1263 /* If two different rows appear, needing translation,
1264 then we cannot use boyer_moore search. */
1267 *pat
++ = translated
;
1271 len_byte
= pat
- patbuf
;
1272 len
= raw_pattern_size
;
1273 pat
= base_pat
= patbuf
;
1276 return boyer_moore (n
, pat
, len
, len_byte
, trt
, inverse_trt
,
1277 pos
, pos_byte
, lim
, lim_byte
,
1280 return simple_search (n
, pat
, len
, len_byte
, trt
,
1281 pos
, pos_byte
, lim
, lim_byte
);
1285 /* Do a simple string search N times for the string PAT,
1286 whose length is LEN/LEN_BYTE,
1287 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1288 TRT is the translation table.
1290 Return the character position where the match is found.
1291 Otherwise, if M matches remained to be found, return -M.
1293 This kind of search works regardless of what is in PAT and
1294 regardless of what is in TRT. It is used in cases where
1295 boyer_moore cannot work. */
1298 simple_search (n
, pat
, len
, len_byte
, trt
, pos
, pos_byte
, lim
, lim_byte
)
1306 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1307 int forward
= n
> 0;
1309 if (lim
> pos
&& multibyte
)
1314 /* Try matching at position POS. */
1316 int this_pos_byte
= pos_byte
;
1318 int this_len_byte
= len_byte
;
1319 unsigned char *p
= pat
;
1320 if (pos
+ len
> lim
)
1323 while (this_len
> 0)
1325 int charlen
, buf_charlen
;
1328 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1329 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1330 ZV_BYTE
- this_pos_byte
,
1332 TRANSLATE (buf_ch
, trt
, buf_ch
);
1334 if (buf_ch
!= pat_ch
)
1337 this_len_byte
-= charlen
;
1341 this_pos_byte
+= buf_charlen
;
1348 pos_byte
+= len_byte
;
1352 INC_BOTH (pos
, pos_byte
);
1362 /* Try matching at position POS. */
1365 unsigned char *p
= pat
;
1367 if (pos
+ len
> lim
)
1370 while (this_len
> 0)
1373 int buf_ch
= FETCH_BYTE (this_pos
);
1374 TRANSLATE (buf_ch
, trt
, buf_ch
);
1376 if (buf_ch
!= pat_ch
)
1394 /* Backwards search. */
1395 else if (lim
< pos
&& multibyte
)
1400 /* Try matching at position POS. */
1401 int this_pos
= pos
- len
;
1402 int this_pos_byte
= pos_byte
- len_byte
;
1404 int this_len_byte
= len_byte
;
1405 unsigned char *p
= pat
;
1407 if (pos
- len
< lim
)
1410 while (this_len
> 0)
1412 int charlen
, buf_charlen
;
1415 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1416 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1417 ZV_BYTE
- this_pos_byte
,
1419 TRANSLATE (buf_ch
, trt
, buf_ch
);
1421 if (buf_ch
!= pat_ch
)
1424 this_len_byte
-= charlen
;
1427 this_pos_byte
+= buf_charlen
;
1434 pos_byte
-= len_byte
;
1438 DEC_BOTH (pos
, pos_byte
);
1448 /* Try matching at position POS. */
1449 int this_pos
= pos
- len
;
1451 unsigned char *p
= pat
;
1453 if (pos
- len
< lim
)
1456 while (this_len
> 0)
1459 int buf_ch
= FETCH_BYTE (this_pos
);
1460 TRANSLATE (buf_ch
, trt
, buf_ch
);
1462 if (buf_ch
!= pat_ch
)
1484 set_search_regs ((multibyte
? pos_byte
: pos
) - len_byte
, len_byte
);
1486 set_search_regs (multibyte
? pos_byte
: pos
, len_byte
);
1496 /* Do Boyer-Moore search N times for the string PAT,
1497 whose length is LEN/LEN_BYTE,
1498 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1499 DIRECTION says which direction we search in.
1500 TRT and INVERSE_TRT are translation tables.
1502 This kind of search works if all the characters in PAT that have
1503 nontrivial translation are the same aside from the last byte. This
1504 makes it possible to translate just the last byte of a character,
1505 and do so after just a simple test of the context.
1507 If that criterion is not satisfied, do not call this function. */
1510 boyer_moore (n
, base_pat
, len
, len_byte
, trt
, inverse_trt
,
1511 pos
, pos_byte
, lim
, lim_byte
, charset_base
)
1513 unsigned char *base_pat
;
1516 Lisp_Object inverse_trt
;
1521 int direction
= ((n
> 0) ? 1 : -1);
1522 register int dirlen
;
1523 int infinity
, limit
, k
, stride_for_teases
;
1524 register int *BM_tab
;
1526 register unsigned char *cursor
, *p_limit
;
1529 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1531 unsigned char simple_translate
[0400];
1532 int translate_prev_byte
;
1533 int translate_anteprev_byte
;
1536 int BM_tab_space
[0400];
1537 BM_tab
= &BM_tab_space
[0];
1539 BM_tab
= (int *) alloca (0400 * sizeof (int));
1541 /* The general approach is that we are going to maintain that we know */
1542 /* the first (closest to the present position, in whatever direction */
1543 /* we're searching) character that could possibly be the last */
1544 /* (furthest from present position) character of a valid match. We */
1545 /* advance the state of our knowledge by looking at that character */
1546 /* and seeing whether it indeed matches the last character of the */
1547 /* pattern. If it does, we take a closer look. If it does not, we */
1548 /* move our pointer (to putative last characters) as far as is */
1549 /* logically possible. This amount of movement, which I call a */
1550 /* stride, will be the length of the pattern if the actual character */
1551 /* appears nowhere in the pattern, otherwise it will be the distance */
1552 /* from the last occurrence of that character to the end of the */
1554 /* As a coding trick, an enormous stride is coded into the table for */
1555 /* characters that match the last character. This allows use of only */
1556 /* a single test, a test for having gone past the end of the */
1557 /* permissible match region, to test for both possible matches (when */
1558 /* the stride goes past the end immediately) and failure to */
1559 /* match (where you get nudged past the end one stride at a time). */
1561 /* Here we make a "mickey mouse" BM table. The stride of the search */
1562 /* is determined only by the last character of the putative match. */
1563 /* If that character does not match, we will stride the proper */
1564 /* distance to propose a match that superimposes it on the last */
1565 /* instance of a character that matches it (per trt), or misses */
1566 /* it entirely if there is none. */
1568 dirlen
= len_byte
* direction
;
1569 infinity
= dirlen
- (lim_byte
+ pos_byte
+ len_byte
+ len_byte
) * direction
;
1571 pat
= (base_pat
+= len_byte
- 1);
1574 BM_tab_base
= BM_tab
;
1576 j
= dirlen
; /* to get it in a register */
1577 /* A character that does not appear in the pattern induces a */
1578 /* stride equal to the pattern length. */
1579 while (BM_tab_base
!= BM_tab
)
1587 /* We use this for translation, instead of TRT itself.
1588 We fill this in to handle the characters that actually
1589 occur in the pattern. Others don't matter anyway! */
1590 bzero (simple_translate
, sizeof simple_translate
);
1591 for (i
= 0; i
< 0400; i
++)
1592 simple_translate
[i
] = i
;
1595 while (i
!= infinity
)
1597 unsigned char *ptr
= pat
+ i
;
1605 int this_translated
= 1;
1608 && (ptr
+ 1 == pat
+ len_byte
|| CHAR_HEAD_P (ptr
[1])))
1610 unsigned char *charstart
= ptr
;
1611 while (! CHAR_HEAD_P (*charstart
))
1613 untranslated
= STRING_CHAR (charstart
, ptr
- charstart
+ 1);
1614 if (charset_base
== (untranslated
& ~0xff))
1616 TRANSLATE (ch
, trt
, untranslated
);
1617 if (! CHAR_HEAD_P (*ptr
))
1619 translate_prev_byte
= ptr
[-1];
1620 if (! CHAR_HEAD_P (translate_prev_byte
))
1621 translate_anteprev_byte
= ptr
[-2];
1626 this_translated
= 0;
1630 else if (!multibyte
)
1631 TRANSLATE (ch
, trt
, *ptr
);
1635 this_translated
= 0;
1639 j
= ((unsigned char) ch
) | 0200;
1641 j
= (unsigned char) ch
;
1644 stride_for_teases
= BM_tab
[j
];
1646 BM_tab
[j
] = dirlen
- i
;
1647 /* A translation table is accompanied by its inverse -- see */
1648 /* comment following downcase_table for details */
1649 if (this_translated
)
1651 int starting_ch
= ch
;
1655 TRANSLATE (ch
, inverse_trt
, ch
);
1657 j
= ((unsigned char) ch
) | 0200;
1659 j
= (unsigned char) ch
;
1661 /* For all the characters that map into CH,
1662 set up simple_translate to map the last byte
1664 simple_translate
[j
] = starting_j
;
1665 if (ch
== starting_ch
)
1667 BM_tab
[j
] = dirlen
- i
;
1676 stride_for_teases
= BM_tab
[j
];
1677 BM_tab
[j
] = dirlen
- i
;
1679 /* stride_for_teases tells how much to stride if we get a */
1680 /* match on the far character but are subsequently */
1681 /* disappointed, by recording what the stride would have been */
1682 /* for that character if the last character had been */
1685 infinity
= dirlen
- infinity
;
1686 pos_byte
+= dirlen
- ((direction
> 0) ? direction
: 0);
1687 /* loop invariant - POS_BYTE points at where last char (first
1688 char if reverse) of pattern would align in a possible match. */
1692 unsigned char *tail_end_ptr
;
1694 /* It's been reported that some (broken) compiler thinks that
1695 Boolean expressions in an arithmetic context are unsigned.
1696 Using an explicit ?1:0 prevents this. */
1697 if ((lim_byte
- pos_byte
- ((direction
> 0) ? 1 : 0)) * direction
1699 return (n
* (0 - direction
));
1700 /* First we do the part we can by pointers (maybe nothing) */
1703 limit
= pos_byte
- dirlen
+ direction
;
1704 limit
= ((direction
> 0)
1705 ? BUFFER_CEILING_OF (limit
)
1706 : BUFFER_FLOOR_OF (limit
));
1707 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1708 can take on without hitting edge of buffer or the gap. */
1709 limit
= ((direction
> 0)
1710 ? min (lim_byte
- 1, min (limit
, pos_byte
+ 20000))
1711 : max (lim_byte
, max (limit
, pos_byte
- 20000)));
1712 tail_end
= BUFFER_CEILING_OF (pos_byte
) + 1;
1713 tail_end_ptr
= BYTE_POS_ADDR (tail_end
);
1715 if ((limit
- pos_byte
) * direction
> 20)
1719 p_limit
= BYTE_POS_ADDR (limit
);
1720 p2
= (cursor
= BYTE_POS_ADDR (pos_byte
));
1721 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1722 while (1) /* use one cursor setting as long as i can */
1724 if (direction
> 0) /* worth duplicating */
1726 /* Use signed comparison if appropriate
1727 to make cursor+infinity sure to be > p_limit.
1728 Assuming that the buffer lies in a range of addresses
1729 that are all "positive" (as ints) or all "negative",
1730 either kind of comparison will work as long
1731 as we don't step by infinity. So pick the kind
1732 that works when we do step by infinity. */
1733 if ((EMACS_INT
) (p_limit
+ infinity
) > (EMACS_INT
) p_limit
)
1734 while ((EMACS_INT
) cursor
<= (EMACS_INT
) p_limit
)
1735 cursor
+= BM_tab
[*cursor
];
1737 while ((EMACS_UINT
) cursor
<= (EMACS_UINT
) p_limit
)
1738 cursor
+= BM_tab
[*cursor
];
1742 if ((EMACS_INT
) (p_limit
+ infinity
) < (EMACS_INT
) p_limit
)
1743 while ((EMACS_INT
) cursor
>= (EMACS_INT
) p_limit
)
1744 cursor
+= BM_tab
[*cursor
];
1746 while ((EMACS_UINT
) cursor
>= (EMACS_UINT
) p_limit
)
1747 cursor
+= BM_tab
[*cursor
];
1749 /* If you are here, cursor is beyond the end of the searched region. */
1750 /* This can happen if you match on the far character of the pattern, */
1751 /* because the "stride" of that character is infinity, a number able */
1752 /* to throw you well beyond the end of the search. It can also */
1753 /* happen if you fail to match within the permitted region and would */
1754 /* otherwise try a character beyond that region */
1755 if ((cursor
- p_limit
) * direction
<= len_byte
)
1756 break; /* a small overrun is genuine */
1757 cursor
-= infinity
; /* large overrun = hit */
1758 i
= dirlen
- direction
;
1761 while ((i
-= direction
) + direction
!= 0)
1764 cursor
-= direction
;
1765 /* Translate only the last byte of a character. */
1767 || ((cursor
== tail_end_ptr
1768 || CHAR_HEAD_P (cursor
[1]))
1769 && (CHAR_HEAD_P (cursor
[0])
1770 || (translate_prev_byte
== cursor
[-1]
1771 && (CHAR_HEAD_P (translate_prev_byte
)
1772 || translate_anteprev_byte
== cursor
[-2])))))
1773 ch
= simple_translate
[*cursor
];
1782 while ((i
-= direction
) + direction
!= 0)
1784 cursor
-= direction
;
1785 if (pat
[i
] != *cursor
)
1789 cursor
+= dirlen
- i
- direction
; /* fix cursor */
1790 if (i
+ direction
== 0)
1794 cursor
-= direction
;
1796 position
= pos_byte
+ cursor
- p2
+ ((direction
> 0)
1797 ? 1 - len_byte
: 0);
1798 set_search_regs (position
, len_byte
);
1800 if ((n
-= direction
) != 0)
1801 cursor
+= dirlen
; /* to resume search */
1803 return ((direction
> 0)
1804 ? search_regs
.end
[0] : search_regs
.start
[0]);
1807 cursor
+= stride_for_teases
; /* <sigh> we lose - */
1809 pos_byte
+= cursor
- p2
;
1812 /* Now we'll pick up a clump that has to be done the hard */
1813 /* way because it covers a discontinuity */
1815 limit
= ((direction
> 0)
1816 ? BUFFER_CEILING_OF (pos_byte
- dirlen
+ 1)
1817 : BUFFER_FLOOR_OF (pos_byte
- dirlen
- 1));
1818 limit
= ((direction
> 0)
1819 ? min (limit
+ len_byte
, lim_byte
- 1)
1820 : max (limit
- len_byte
, lim_byte
));
1821 /* LIMIT is now the last value POS_BYTE can have
1822 and still be valid for a possible match. */
1825 /* This loop can be coded for space rather than */
1826 /* speed because it will usually run only once. */
1827 /* (the reach is at most len + 21, and typically */
1828 /* does not exceed len) */
1829 while ((limit
- pos_byte
) * direction
>= 0)
1830 pos_byte
+= BM_tab
[FETCH_BYTE (pos_byte
)];
1831 /* now run the same tests to distinguish going off the */
1832 /* end, a match or a phony match. */
1833 if ((pos_byte
- limit
) * direction
<= len_byte
)
1834 break; /* ran off the end */
1835 /* Found what might be a match.
1836 Set POS_BYTE back to last (first if reverse) pos. */
1837 pos_byte
-= infinity
;
1838 i
= dirlen
- direction
;
1839 while ((i
-= direction
) + direction
!= 0)
1843 pos_byte
-= direction
;
1844 ptr
= BYTE_POS_ADDR (pos_byte
);
1845 /* Translate only the last byte of a character. */
1847 || ((ptr
== tail_end_ptr
1848 || CHAR_HEAD_P (ptr
[1]))
1849 && (CHAR_HEAD_P (ptr
[0])
1850 || (translate_prev_byte
== ptr
[-1]
1851 && (CHAR_HEAD_P (translate_prev_byte
)
1852 || translate_anteprev_byte
== ptr
[-2])))))
1853 ch
= simple_translate
[*ptr
];
1859 /* Above loop has moved POS_BYTE part or all the way
1860 back to the first pos (last pos if reverse).
1861 Set it once again at the last (first if reverse) char. */
1862 pos_byte
+= dirlen
- i
- direction
;
1863 if (i
+ direction
== 0)
1866 pos_byte
-= direction
;
1868 position
= pos_byte
+ ((direction
> 0) ? 1 - len_byte
: 0);
1870 set_search_regs (position
, len_byte
);
1872 if ((n
-= direction
) != 0)
1873 pos_byte
+= dirlen
; /* to resume search */
1875 return ((direction
> 0)
1876 ? search_regs
.end
[0] : search_regs
.start
[0]);
1879 pos_byte
+= stride_for_teases
;
1882 /* We have done one clump. Can we continue? */
1883 if ((lim_byte
- pos_byte
) * direction
< 0)
1884 return ((0 - n
) * direction
);
1886 return BYTE_TO_CHAR (pos_byte
);
1889 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1890 for a match just found in the current buffer. */
1893 set_search_regs (beg_byte
, nbytes
)
1894 int beg_byte
, nbytes
;
1896 /* Make sure we have registers in which to store
1897 the match position. */
1898 if (search_regs
.num_regs
== 0)
1900 search_regs
.start
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
1901 search_regs
.end
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
1902 search_regs
.num_regs
= 2;
1905 search_regs
.start
[0] = BYTE_TO_CHAR (beg_byte
);
1906 search_regs
.end
[0] = BYTE_TO_CHAR (beg_byte
+ nbytes
);
1907 XSETBUFFER (last_thing_searched
, current_buffer
);
1910 /* Given a string of words separated by word delimiters,
1911 compute a regexp that matches those exact words
1912 separated by arbitrary punctuation. */
1918 register unsigned char *p
, *o
;
1919 register int i
, i_byte
, len
, punct_count
= 0, word_count
= 0;
1924 CHECK_STRING (string
, 0);
1925 p
= XSTRING (string
)->data
;
1926 len
= XSTRING (string
)->size
;
1928 for (i
= 0, i_byte
= 0; i
< len
; )
1932 if (STRING_MULTIBYTE (string
))
1933 FETCH_STRING_CHAR_ADVANCE (c
, string
, i
, i_byte
);
1935 c
= XSTRING (string
)->data
[i
++];
1937 if (SYNTAX (c
) != Sword
)
1940 if (i
> 0 && SYNTAX (prev_c
) == Sword
)
1947 if (SYNTAX (prev_c
) == Sword
)
1950 return build_string ("");
1952 adjust
= - punct_count
+ 5 * (word_count
- 1) + 4;
1953 val
= make_uninit_multibyte_string (len
+ adjust
,
1954 STRING_BYTES (XSTRING (string
)) + adjust
);
1956 o
= XSTRING (val
)->data
;
1960 for (i
= 0; i
< STRING_BYTES (XSTRING (val
)); i
++)
1961 if (SYNTAX (p
[i
]) == Sword
)
1963 else if (i
> 0 && SYNTAX (p
[i
-1]) == Sword
&& --word_count
)
1978 DEFUN ("search-backward", Fsearch_backward
, Ssearch_backward
, 1, 4,
1979 "MSearch backward: ",
1980 "Search backward from point for STRING.\n\
1981 Set point to the beginning of the occurrence found, and return point.\n\
1982 An optional second argument bounds the search; it is a buffer position.\n\
1983 The match found must not extend before that position.\n\
1984 Optional third argument, if t, means if fail just return nil (no error).\n\
1985 If not nil and not t, position at limit of search and return nil.\n\
1986 Optional fourth argument is repeat count--search for successive occurrences.\n\
1987 See also the functions `match-beginning', `match-end' and `replace-match'.")
1988 (string
, bound
, noerror
, count
)
1989 Lisp_Object string
, bound
, noerror
, count
;
1991 return search_command (string
, bound
, noerror
, count
, -1, 0, 0);
1994 DEFUN ("search-forward", Fsearch_forward
, Ssearch_forward
, 1, 4, "MSearch: ",
1995 "Search forward from point for STRING.\n\
1996 Set point to the end of the occurrence found, and return point.\n\
1997 An optional second argument bounds the search; it is a buffer position.\n\
1998 The match found must not extend after that position. nil is equivalent\n\
2000 Optional third argument, if t, means if fail just return nil (no error).\n\
2001 If not nil and not t, move to limit of search and return nil.\n\
2002 Optional fourth argument is repeat count--search for successive occurrences.\n\
2003 See also the functions `match-beginning', `match-end' and `replace-match'.")
2004 (string
, bound
, noerror
, count
)
2005 Lisp_Object string
, bound
, noerror
, count
;
2007 return search_command (string
, bound
, noerror
, count
, 1, 0, 0);
2010 DEFUN ("word-search-backward", Fword_search_backward
, Sword_search_backward
, 1, 4,
2011 "sWord search backward: ",
2012 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2013 Set point to the beginning of the occurrence found, and return point.\n\
2014 An optional second argument bounds the search; it is a buffer position.\n\
2015 The match found must not extend before that position.\n\
2016 Optional third argument, if t, means if fail just return nil (no error).\n\
2017 If not nil and not t, move to limit of search and return nil.\n\
2018 Optional fourth argument is repeat count--search for successive occurrences.")
2019 (string
, bound
, noerror
, count
)
2020 Lisp_Object string
, bound
, noerror
, count
;
2022 return search_command (wordify (string
), bound
, noerror
, count
, -1, 1, 0);
2025 DEFUN ("word-search-forward", Fword_search_forward
, Sword_search_forward
, 1, 4,
2027 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2028 Set point to the end of the occurrence found, and return point.\n\
2029 An optional second argument bounds the search; it is a buffer position.\n\
2030 The match found must not extend after that position.\n\
2031 Optional third argument, if t, means if fail just return nil (no error).\n\
2032 If not nil and not t, move to limit of search and return nil.\n\
2033 Optional fourth argument is repeat count--search for successive occurrences.")
2034 (string
, bound
, noerror
, count
)
2035 Lisp_Object string
, bound
, noerror
, count
;
2037 return search_command (wordify (string
), bound
, noerror
, count
, 1, 1, 0);
2040 DEFUN ("re-search-backward", Fre_search_backward
, Sre_search_backward
, 1, 4,
2041 "sRE search backward: ",
2042 "Search backward from point for match for regular expression REGEXP.\n\
2043 Set point to the beginning of the match, and return point.\n\
2044 The match found is the one starting last in the buffer\n\
2045 and yet ending before the origin of the search.\n\
2046 An optional second argument bounds the search; it is a buffer position.\n\
2047 The match found must start at or after that position.\n\
2048 Optional third argument, if t, means if fail just return nil (no error).\n\
2049 If not nil and not t, move to limit of search and return nil.\n\
2050 Optional fourth argument is repeat count--search for successive occurrences.\n\
2051 See also the functions `match-beginning', `match-end' and `replace-match'.")
2052 (regexp
, bound
, noerror
, count
)
2053 Lisp_Object regexp
, bound
, noerror
, count
;
2055 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 0);
2058 DEFUN ("re-search-forward", Fre_search_forward
, Sre_search_forward
, 1, 4,
2060 "Search forward from point for regular expression REGEXP.\n\
2061 Set point to the end of the occurrence found, and return point.\n\
2062 An optional second argument bounds the search; it is a buffer position.\n\
2063 The match found must not extend after that position.\n\
2064 Optional third argument, if t, means if fail just return nil (no error).\n\
2065 If not nil and not t, move to limit of search and return nil.\n\
2066 Optional fourth argument is repeat count--search for successive occurrences.\n\
2067 See also the functions `match-beginning', `match-end' and `replace-match'.")
2068 (regexp
, bound
, noerror
, count
)
2069 Lisp_Object regexp
, bound
, noerror
, count
;
2071 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 0);
2074 DEFUN ("posix-search-backward", Fposix_search_backward
, Sposix_search_backward
, 1, 4,
2075 "sPosix search backward: ",
2076 "Search backward from point for match for regular expression REGEXP.\n\
2077 Find the longest match in accord with Posix regular expression rules.\n\
2078 Set point to the beginning of the match, and return point.\n\
2079 The match found is the one starting last in the buffer\n\
2080 and yet ending before the origin of the search.\n\
2081 An optional second argument bounds the search; it is a buffer position.\n\
2082 The match found must start at or after that position.\n\
2083 Optional third argument, if t, means if fail just return nil (no error).\n\
2084 If not nil and not t, move to limit of search and return nil.\n\
2085 Optional fourth argument is repeat count--search for successive occurrences.\n\
2086 See also the functions `match-beginning', `match-end' and `replace-match'.")
2087 (regexp
, bound
, noerror
, count
)
2088 Lisp_Object regexp
, bound
, noerror
, count
;
2090 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 1);
2093 DEFUN ("posix-search-forward", Fposix_search_forward
, Sposix_search_forward
, 1, 4,
2095 "Search forward from point for regular expression REGEXP.\n\
2096 Find the longest match in accord with Posix regular expression rules.\n\
2097 Set point to the end of the occurrence found, and return point.\n\
2098 An optional second argument bounds the search; it is a buffer position.\n\
2099 The match found must not extend after that position.\n\
2100 Optional third argument, if t, means if fail just return nil (no error).\n\
2101 If not nil and not t, move to limit of search and return nil.\n\
2102 Optional fourth argument is repeat count--search for successive occurrences.\n\
2103 See also the functions `match-beginning', `match-end' and `replace-match'.")
2104 (regexp
, bound
, noerror
, count
)
2105 Lisp_Object regexp
, bound
, noerror
, count
;
2107 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 1);
2110 DEFUN ("replace-match", Freplace_match
, Sreplace_match
, 1, 5, 0,
2111 "Replace text matched by last search with NEWTEXT.\n\
2112 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2113 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2114 based on the replaced text.\n\
2115 If the replaced text has only capital letters\n\
2116 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2117 If the replaced text has at least one word starting with a capital letter,\n\
2118 then capitalize each word in NEWTEXT.\n\n\
2119 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2120 Otherwise treat `\\' as special:\n\
2121 `\\&' in NEWTEXT means substitute original matched text.\n\
2122 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2123 If Nth parens didn't match, substitute nothing.\n\
2124 `\\\\' means insert one `\\'.\n\
2125 FIXEDCASE and LITERAL are optional arguments.\n\
2126 Leaves point at end of replacement text.\n\
2128 The optional fourth argument STRING can be a string to modify.\n\
2129 In that case, this function creates and returns a new string\n\
2130 which is made by replacing the part of STRING that was matched.\n\
2132 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2133 It says to replace just that subexpression instead of the whole match.\n\
2134 This is useful only after a regular expression search or match\n\
2135 since only regular expressions have distinguished subexpressions.")
2136 (newtext
, fixedcase
, literal
, string
, subexp
)
2137 Lisp_Object newtext
, fixedcase
, literal
, string
, subexp
;
2139 enum { nochange
, all_caps
, cap_initial
} case_action
;
2140 register int pos
, last
;
2141 int some_multiletter_word
;
2144 int some_nonuppercase_initial
;
2145 register int c
, prevc
;
2148 int opoint
, newpoint
;
2150 CHECK_STRING (newtext
, 0);
2152 if (! NILP (string
))
2153 CHECK_STRING (string
, 4);
2155 case_action
= nochange
; /* We tried an initialization */
2156 /* but some C compilers blew it */
2158 if (search_regs
.num_regs
<= 0)
2159 error ("replace-match called before any match found");
2165 CHECK_NUMBER (subexp
, 3);
2166 sub
= XINT (subexp
);
2167 if (sub
< 0 || sub
>= search_regs
.num_regs
)
2168 args_out_of_range (subexp
, make_number (search_regs
.num_regs
));
2173 if (search_regs
.start
[sub
] < BEGV
2174 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2175 || search_regs
.end
[sub
] > ZV
)
2176 args_out_of_range (make_number (search_regs
.start
[sub
]),
2177 make_number (search_regs
.end
[sub
]));
2181 if (search_regs
.start
[sub
] < 0
2182 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2183 || search_regs
.end
[sub
] > XSTRING (string
)->size
)
2184 args_out_of_range (make_number (search_regs
.start
[sub
]),
2185 make_number (search_regs
.end
[sub
]));
2188 if (NILP (fixedcase
))
2191 /* Decide how to casify by examining the matched text. */
2194 last
= CHAR_TO_BYTE (search_regs
.end
[sub
]);
2196 last
= search_regs
.end
[sub
];
2199 beg
= CHAR_TO_BYTE (search_regs
.start
[sub
]);
2201 beg
= search_regs
.start
[sub
];
2204 case_action
= all_caps
;
2206 /* some_multiletter_word is set nonzero if any original word
2207 is more than one letter long. */
2208 some_multiletter_word
= 0;
2210 some_nonuppercase_initial
= 0;
2213 for (pos
= beg
; pos
< last
; pos
++)
2216 c
= FETCH_BYTE (pos
);
2218 c
= XSTRING (string
)->data
[pos
];
2222 /* Cannot be all caps if any original char is lower case */
2225 if (SYNTAX (prevc
) != Sword
)
2226 some_nonuppercase_initial
= 1;
2228 some_multiletter_word
= 1;
2230 else if (!NOCASEP (c
))
2233 if (SYNTAX (prevc
) != Sword
)
2236 some_multiletter_word
= 1;
2240 /* If the initial is a caseless word constituent,
2241 treat that like a lowercase initial. */
2242 if (SYNTAX (prevc
) != Sword
)
2243 some_nonuppercase_initial
= 1;
2249 /* Convert to all caps if the old text is all caps
2250 and has at least one multiletter word. */
2251 if (! some_lowercase
&& some_multiletter_word
)
2252 case_action
= all_caps
;
2253 /* Capitalize each word, if the old text has all capitalized words. */
2254 else if (!some_nonuppercase_initial
&& some_multiletter_word
)
2255 case_action
= cap_initial
;
2256 else if (!some_nonuppercase_initial
&& some_uppercase
)
2257 /* Should x -> yz, operating on X, give Yz or YZ?
2258 We'll assume the latter. */
2259 case_action
= all_caps
;
2261 case_action
= nochange
;
2264 /* Do replacement in a string. */
2267 Lisp_Object before
, after
;
2269 before
= Fsubstring (string
, make_number (0),
2270 make_number (search_regs
.start
[sub
]));
2271 after
= Fsubstring (string
, make_number (search_regs
.end
[sub
]), Qnil
);
2273 /* Substitute parts of the match into NEWTEXT
2278 int lastpos_byte
= -1;
2279 /* We build up the substituted string in ACCUM. */
2286 for (pos_byte
= 0, pos
= 0; pos_byte
< STRING_BYTES (XSTRING (newtext
));)
2290 int delbackslash
= 0;
2292 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2296 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2299 substart
= search_regs
.start
[sub
];
2300 subend
= search_regs
.end
[sub
];
2302 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
2304 if (search_regs
.start
[c
- '0'] >= 0)
2306 substart
= search_regs
.start
[c
- '0'];
2307 subend
= search_regs
.end
[c
- '0'];
2313 error ("Invalid use of `\\' in replacement text");
2317 if (pos
- 1 != lastpos
+ 1)
2318 middle
= substring_both (newtext
, lastpos
+ 1,
2320 pos
- 1, pos_byte
- 1);
2323 accum
= concat3 (accum
, middle
,
2325 make_number (substart
),
2326 make_number (subend
)));
2328 lastpos_byte
= pos_byte
;
2330 else if (delbackslash
)
2332 middle
= substring_both (newtext
, lastpos
+ 1,
2336 accum
= concat2 (accum
, middle
);
2338 lastpos_byte
= pos_byte
;
2342 if (pos
!= lastpos
+ 1)
2343 middle
= substring_both (newtext
, lastpos
+ 1,
2349 newtext
= concat2 (accum
, middle
);
2352 /* Do case substitution in NEWTEXT if desired. */
2353 if (case_action
== all_caps
)
2354 newtext
= Fupcase (newtext
);
2355 else if (case_action
== cap_initial
)
2356 newtext
= Fupcase_initials (newtext
);
2358 return concat3 (before
, newtext
, after
);
2361 /* Record point, the move (quietly) to the start of the match. */
2362 if (PT
> search_regs
.start
[sub
])
2367 TEMP_SET_PT (search_regs
.start
[sub
]);
2369 /* We insert the replacement text before the old text, and then
2370 delete the original text. This means that markers at the
2371 beginning or end of the original will float to the corresponding
2372 position in the replacement. */
2373 if (!NILP (literal
))
2374 Finsert_and_inherit (1, &newtext
);
2377 struct gcpro gcpro1
;
2380 for (pos
= 0; pos
< XSTRING (newtext
)->size
; pos
++)
2382 int offset
= PT
- search_regs
.start
[sub
];
2384 c
= XSTRING (newtext
)->data
[pos
];
2387 c
= XSTRING (newtext
)->data
[++pos
];
2389 Finsert_buffer_substring
2390 (Fcurrent_buffer (),
2391 make_number (search_regs
.start
[sub
] + offset
),
2392 make_number (search_regs
.end
[sub
] + offset
));
2393 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
2395 if (search_regs
.start
[c
- '0'] >= 1)
2396 Finsert_buffer_substring
2397 (Fcurrent_buffer (),
2398 make_number (search_regs
.start
[c
- '0'] + offset
),
2399 make_number (search_regs
.end
[c
- '0'] + offset
));
2404 error ("Invalid use of `\\' in replacement text");
2412 inslen
= PT
- (search_regs
.start
[sub
]);
2413 del_range (search_regs
.start
[sub
] + inslen
, search_regs
.end
[sub
] + inslen
);
2415 if (case_action
== all_caps
)
2416 Fupcase_region (make_number (PT
- inslen
), make_number (PT
));
2417 else if (case_action
== cap_initial
)
2418 Fupcase_initials_region (make_number (PT
- inslen
), make_number (PT
));
2422 /* Put point back where it was in the text. */
2424 TEMP_SET_PT (opoint
+ ZV
);
2426 TEMP_SET_PT (opoint
);
2428 /* Now move point "officially" to the start of the inserted replacement. */
2429 move_if_not_intangible (newpoint
);
2435 match_limit (num
, beginningp
)
2441 CHECK_NUMBER (num
, 0);
2443 if (n
< 0 || n
>= search_regs
.num_regs
)
2444 args_out_of_range (num
, make_number (search_regs
.num_regs
));
2445 if (search_regs
.num_regs
<= 0
2446 || search_regs
.start
[n
] < 0)
2448 return (make_number ((beginningp
) ? search_regs
.start
[n
]
2449 : search_regs
.end
[n
]));
2452 DEFUN ("match-beginning", Fmatch_beginning
, Smatch_beginning
, 1, 1, 0,
2453 "Return position of start of text matched by last search.\n\
2454 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2456 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2458 Zero means the entire text matched by the whole regexp or whole string.")
2462 return match_limit (subexp
, 1);
2465 DEFUN ("match-end", Fmatch_end
, Smatch_end
, 1, 1, 0,
2466 "Return position of end of text matched by last search.\n\
2467 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2469 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2471 Zero means the entire text matched by the whole regexp or whole string.")
2475 return match_limit (subexp
, 0);
2478 DEFUN ("match-data", Fmatch_data
, Smatch_data
, 0, 2, 0,
2479 "Return a list containing all info on what the last search matched.\n\
2480 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2481 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2482 if the last match was on a buffer; integers or nil if a string was matched.\n\
2483 Use `store-match-data' to reinstate the data in this list.\n\
2485 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2486 \(rather than markers) to represent buffer positions.\n\
2487 If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2488 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2490 Lisp_Object integers
, reuse
;
2492 Lisp_Object tail
, prev
;
2496 if (NILP (last_thing_searched
))
2499 data
= (Lisp_Object
*) alloca ((2 * search_regs
.num_regs
)
2500 * sizeof (Lisp_Object
));
2503 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2505 int start
= search_regs
.start
[i
];
2508 if (EQ (last_thing_searched
, Qt
)
2509 || ! NILP (integers
))
2511 XSETFASTINT (data
[2 * i
], start
);
2512 XSETFASTINT (data
[2 * i
+ 1], search_regs
.end
[i
]);
2514 else if (BUFFERP (last_thing_searched
))
2516 data
[2 * i
] = Fmake_marker ();
2517 Fset_marker (data
[2 * i
],
2518 make_number (start
),
2519 last_thing_searched
);
2520 data
[2 * i
+ 1] = Fmake_marker ();
2521 Fset_marker (data
[2 * i
+ 1],
2522 make_number (search_regs
.end
[i
]),
2523 last_thing_searched
);
2526 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2532 data
[2 * i
] = data
[2 * i
+ 1] = Qnil
;
2535 /* If REUSE is not usable, cons up the values and return them. */
2536 if (! CONSP (reuse
))
2537 return Flist (2 * len
+ 2, data
);
2539 /* If REUSE is a list, store as many value elements as will fit
2540 into the elements of REUSE. */
2541 for (i
= 0, tail
= reuse
; CONSP (tail
);
2542 i
++, tail
= XCONS (tail
)->cdr
)
2544 if (i
< 2 * len
+ 2)
2545 XCONS (tail
)->car
= data
[i
];
2547 XCONS (tail
)->car
= Qnil
;
2551 /* If we couldn't fit all value elements into REUSE,
2552 cons up the rest of them and add them to the end of REUSE. */
2553 if (i
< 2 * len
+ 2)
2554 XCONS (prev
)->cdr
= Flist (2 * len
+ 2 - i
, data
+ i
);
2560 DEFUN ("set-match-data", Fset_match_data
, Sset_match_data
, 1, 1, 0,
2561 "Set internal data on last search match from elements of LIST.\n\
2562 LIST should have been created by calling `match-data' previously.")
2564 register Lisp_Object list
;
2567 register Lisp_Object marker
;
2569 if (running_asynch_code
)
2570 save_search_regs ();
2572 if (!CONSP (list
) && !NILP (list
))
2573 list
= wrong_type_argument (Qconsp
, list
);
2575 /* Unless we find a marker with a buffer in LIST, assume that this
2576 match data came from a string. */
2577 last_thing_searched
= Qt
;
2579 /* Allocate registers if they don't already exist. */
2581 int length
= XFASTINT (Flength (list
)) / 2;
2583 if (length
> search_regs
.num_regs
)
2585 if (search_regs
.num_regs
== 0)
2588 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2590 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2595 = (regoff_t
*) xrealloc (search_regs
.start
,
2596 length
* sizeof (regoff_t
));
2598 = (regoff_t
*) xrealloc (search_regs
.end
,
2599 length
* sizeof (regoff_t
));
2602 search_regs
.num_regs
= length
;
2606 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2608 marker
= Fcar (list
);
2611 search_regs
.start
[i
] = -1;
2616 if (MARKERP (marker
))
2618 if (XMARKER (marker
)->buffer
== 0)
2619 XSETFASTINT (marker
, 0);
2621 XSETBUFFER (last_thing_searched
, XMARKER (marker
)->buffer
);
2624 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
2625 search_regs
.start
[i
] = XINT (marker
);
2628 marker
= Fcar (list
);
2629 if (MARKERP (marker
) && XMARKER (marker
)->buffer
== 0)
2630 XSETFASTINT (marker
, 0);
2632 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
2633 search_regs
.end
[i
] = XINT (marker
);
2641 /* If non-zero the match data have been saved in saved_search_regs
2642 during the execution of a sentinel or filter. */
2643 static int search_regs_saved
;
2644 static struct re_registers saved_search_regs
;
2646 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2647 if asynchronous code (filter or sentinel) is running. */
2651 if (!search_regs_saved
)
2653 saved_search_regs
.num_regs
= search_regs
.num_regs
;
2654 saved_search_regs
.start
= search_regs
.start
;
2655 saved_search_regs
.end
= search_regs
.end
;
2656 search_regs
.num_regs
= 0;
2657 search_regs
.start
= 0;
2658 search_regs
.end
= 0;
2660 search_regs_saved
= 1;
2664 /* Called upon exit from filters and sentinels. */
2666 restore_match_data ()
2668 if (search_regs_saved
)
2670 if (search_regs
.num_regs
> 0)
2672 xfree (search_regs
.start
);
2673 xfree (search_regs
.end
);
2675 search_regs
.num_regs
= saved_search_regs
.num_regs
;
2676 search_regs
.start
= saved_search_regs
.start
;
2677 search_regs
.end
= saved_search_regs
.end
;
2679 search_regs_saved
= 0;
2683 /* Quote a string to inactivate reg-expr chars */
2685 DEFUN ("regexp-quote", Fregexp_quote
, Sregexp_quote
, 1, 1, 0,
2686 "Return a regexp string which matches exactly STRING and nothing else.")
2690 register unsigned char *in
, *out
, *end
;
2691 register unsigned char *temp
;
2692 int backslashes_added
= 0;
2694 CHECK_STRING (string
, 0);
2696 temp
= (unsigned char *) alloca (STRING_BYTES (XSTRING (string
)) * 2);
2698 /* Now copy the data into the new string, inserting escapes. */
2700 in
= XSTRING (string
)->data
;
2701 end
= in
+ STRING_BYTES (XSTRING (string
));
2704 for (; in
!= end
; in
++)
2706 if (*in
== '[' || *in
== ']'
2707 || *in
== '*' || *in
== '.' || *in
== '\\'
2708 || *in
== '?' || *in
== '+'
2709 || *in
== '^' || *in
== '$')
2710 *out
++ = '\\', backslashes_added
++;
2714 return make_specified_string (temp
,
2715 XSTRING (string
)->size
+ backslashes_added
,
2717 STRING_MULTIBYTE (string
));
2724 for (i
= 0; i
< REGEXP_CACHE_SIZE
; ++i
)
2726 searchbufs
[i
].buf
.allocated
= 100;
2727 searchbufs
[i
].buf
.buffer
= (unsigned char *) malloc (100);
2728 searchbufs
[i
].buf
.fastmap
= searchbufs
[i
].fastmap
;
2729 searchbufs
[i
].regexp
= Qnil
;
2730 staticpro (&searchbufs
[i
].regexp
);
2731 searchbufs
[i
].next
= (i
== REGEXP_CACHE_SIZE
-1 ? 0 : &searchbufs
[i
+1]);
2733 searchbuf_head
= &searchbufs
[0];
2735 Qsearch_failed
= intern ("search-failed");
2736 staticpro (&Qsearch_failed
);
2737 Qinvalid_regexp
= intern ("invalid-regexp");
2738 staticpro (&Qinvalid_regexp
);
2740 Fput (Qsearch_failed
, Qerror_conditions
,
2741 Fcons (Qsearch_failed
, Fcons (Qerror
, Qnil
)));
2742 Fput (Qsearch_failed
, Qerror_message
,
2743 build_string ("Search failed"));
2745 Fput (Qinvalid_regexp
, Qerror_conditions
,
2746 Fcons (Qinvalid_regexp
, Fcons (Qerror
, Qnil
)));
2747 Fput (Qinvalid_regexp
, Qerror_message
,
2748 build_string ("Invalid regexp"));
2750 last_thing_searched
= Qnil
;
2751 staticpro (&last_thing_searched
);
2753 defsubr (&Slooking_at
);
2754 defsubr (&Sposix_looking_at
);
2755 defsubr (&Sstring_match
);
2756 defsubr (&Sposix_string_match
);
2757 defsubr (&Ssearch_forward
);
2758 defsubr (&Ssearch_backward
);
2759 defsubr (&Sword_search_forward
);
2760 defsubr (&Sword_search_backward
);
2761 defsubr (&Sre_search_forward
);
2762 defsubr (&Sre_search_backward
);
2763 defsubr (&Sposix_search_forward
);
2764 defsubr (&Sposix_search_backward
);
2765 defsubr (&Sreplace_match
);
2766 defsubr (&Smatch_beginning
);
2767 defsubr (&Smatch_end
);
2768 defsubr (&Smatch_data
);
2769 defsubr (&Sset_match_data
);
2770 defsubr (&Sregexp_quote
);