1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
31 #include "region-cache.h"
33 #include "blockinput.h"
34 #include "intervals.h"
36 #include <sys/types.h>
39 #define min(a, b) ((a) < (b) ? (a) : (b))
40 #define max(a, b) ((a) > (b) ? (a) : (b))
42 #define REGEXP_CACHE_SIZE 20
44 /* If the regexp is non-nil, then the buffer contains the compiled form
45 of that regexp, suitable for searching. */
48 struct regexp_cache
*next
;
50 struct re_pattern_buffer buf
;
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs
[REGEXP_CACHE_SIZE
];
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache
*searchbuf_head
;
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs
;
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched
;
87 /* error condition signaled when regexp compile_pattern fails */
89 Lisp_Object Qinvalid_regexp
;
91 static void set_search_regs ();
92 static void save_search_regs ();
93 static int simple_search ();
94 static int boyer_moore ();
95 static int search_buffer ();
100 error ("Stack overflow in regexp matcher");
109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
112 TRANSLATE is a translation table for ignoring case, or nil for none.
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
118 for this pattern. 0 means backtrack only enough to get a valid match.
119 MULTIBYTE is nonzero if we want to handle multibyte characters in
120 PATTERN. 0 means all multibyte characters are recognized just as
121 sequences of binary data. */
124 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
)
125 struct regexp_cache
*cp
;
127 Lisp_Object translate
;
128 struct re_registers
*regp
;
132 unsigned char *raw_pattern
;
133 int raw_pattern_size
;
137 /* MULTIBYTE says whether the text to be searched is multibyte.
138 We must convert PATTERN to match that, or we will not really
139 find things right. */
141 if (multibyte
== STRING_MULTIBYTE (pattern
))
143 raw_pattern
= (unsigned char *) XSTRING (pattern
)->data
;
144 raw_pattern_size
= STRING_BYTES (XSTRING (pattern
));
148 raw_pattern_size
= count_size_as_multibyte (XSTRING (pattern
)->data
,
149 XSTRING (pattern
)->size
);
150 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
151 copy_text (XSTRING (pattern
)->data
, raw_pattern
,
152 XSTRING (pattern
)->size
, 0, 1);
156 /* Converting multibyte to single-byte.
158 ??? Perhaps this conversion should be done in a special way
159 by subtracting nonascii-insert-offset from each non-ASCII char,
160 so that only the multibyte chars which really correspond to
161 the chosen single-byte character set can possibly match. */
162 raw_pattern_size
= XSTRING (pattern
)->size
;
163 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
164 copy_text (XSTRING (pattern
)->data
, raw_pattern
,
165 STRING_BYTES (XSTRING (pattern
)), 1, 0);
169 cp
->buf
.translate
= (! NILP (translate
) ? translate
: make_number (0));
171 cp
->buf
.multibyte
= multibyte
;
173 old
= re_set_syntax (RE_SYNTAX_EMACS
174 | (posix
? 0 : RE_NO_POSIX_BACKTRACKING
));
175 val
= (char *) re_compile_pattern ((char *)raw_pattern
,
176 raw_pattern_size
, &cp
->buf
);
180 Fsignal (Qinvalid_regexp
, Fcons (build_string (val
), Qnil
));
182 cp
->regexp
= Fcopy_sequence (pattern
);
185 /* Compile a regexp if necessary, but first check to see if there's one in
187 PATTERN is the pattern to compile.
188 TRANSLATE is a translation table for ignoring case, or nil for none.
189 REGP is the structure that says where to store the "register"
190 values that will result from matching this pattern.
191 If it is 0, we should compile the pattern not to record any
192 subexpression bounds.
193 POSIX is nonzero if we want full backtracking (POSIX style)
194 for this pattern. 0 means backtrack only enough to get a valid match. */
196 struct re_pattern_buffer
*
197 compile_pattern (pattern
, regp
, translate
, posix
, multibyte
)
199 struct re_registers
*regp
;
200 Lisp_Object translate
;
201 int posix
, multibyte
;
203 struct regexp_cache
*cp
, **cpp
;
205 for (cpp
= &searchbuf_head
; ; cpp
= &cp
->next
)
208 if (XSTRING (cp
->regexp
)->size
== XSTRING (pattern
)->size
209 && !NILP (Fstring_equal (cp
->regexp
, pattern
))
210 && EQ (cp
->buf
.translate
, (! NILP (translate
) ? translate
: make_number (0)))
211 && cp
->posix
== posix
212 && cp
->buf
.multibyte
== multibyte
)
215 /* If we're at the end of the cache, compile into the last cell. */
218 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
);
223 /* When we get here, cp (aka *cpp) contains the compiled pattern,
224 either because we found it in the cache or because we just compiled it.
225 Move it to the front of the queue to mark it as most recently used. */
227 cp
->next
= searchbuf_head
;
230 /* Advise the searching functions about the space we have allocated
231 for register data. */
233 re_set_registers (&cp
->buf
, regp
, regp
->num_regs
, regp
->start
, regp
->end
);
238 /* Error condition used for failing searches */
239 Lisp_Object Qsearch_failed
;
245 Fsignal (Qsearch_failed
, Fcons (arg
, Qnil
));
250 looking_at_1 (string
, posix
)
255 unsigned char *p1
, *p2
;
258 struct re_pattern_buffer
*bufp
;
260 if (running_asynch_code
)
263 CHECK_STRING (string
, 0);
264 bufp
= compile_pattern (string
, &search_regs
,
265 (!NILP (current_buffer
->case_fold_search
)
266 ? DOWNCASE_TABLE
: Qnil
),
268 !NILP (current_buffer
->enable_multibyte_characters
));
271 QUIT
; /* Do a pending quit right away, to avoid paradoxical behavior */
273 /* Get pointers and sizes of the two strings
274 that make up the visible portion of the buffer. */
277 s1
= GPT_BYTE
- BEGV_BYTE
;
279 s2
= ZV_BYTE
- GPT_BYTE
;
283 s2
= ZV_BYTE
- BEGV_BYTE
;
288 s1
= ZV_BYTE
- BEGV_BYTE
;
292 re_match_object
= Qnil
;
294 i
= re_match_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
295 PT_BYTE
- BEGV_BYTE
, &search_regs
,
296 ZV_BYTE
- BEGV_BYTE
);
300 val
= (0 <= i
? Qt
: Qnil
);
302 for (i
= 0; i
< search_regs
.num_regs
; i
++)
303 if (search_regs
.start
[i
] >= 0)
306 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
308 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
310 XSETBUFFER (last_thing_searched
, current_buffer
);
315 DEFUN ("looking-at", Flooking_at
, Slooking_at
, 1, 1, 0,
316 "Return t if text after point matches regular expression REGEXP.\n\
317 This function modifies the match data that `match-beginning',\n\
318 `match-end' and `match-data' access; save and restore the match\n\
319 data if you want to preserve them.")
323 return looking_at_1 (regexp
, 0);
326 DEFUN ("posix-looking-at", Fposix_looking_at
, Sposix_looking_at
, 1, 1, 0,
327 "Return t if text after point matches regular expression REGEXP.\n\
328 Find the longest match, in accord with Posix regular expression rules.\n\
329 This function modifies the match data that `match-beginning',\n\
330 `match-end' and `match-data' access; save and restore the match\n\
331 data if you want to preserve them.")
335 return looking_at_1 (regexp
, 1);
339 string_match_1 (regexp
, string
, start
, posix
)
340 Lisp_Object regexp
, string
, start
;
344 struct re_pattern_buffer
*bufp
;
348 if (running_asynch_code
)
351 CHECK_STRING (regexp
, 0);
352 CHECK_STRING (string
, 1);
355 pos
= 0, pos_byte
= 0;
358 int len
= XSTRING (string
)->size
;
360 CHECK_NUMBER (start
, 2);
362 if (pos
< 0 && -pos
<= len
)
364 else if (0 > pos
|| pos
> len
)
365 args_out_of_range (string
, start
);
366 pos_byte
= string_char_to_byte (string
, pos
);
369 bufp
= compile_pattern (regexp
, &search_regs
,
370 (!NILP (current_buffer
->case_fold_search
)
371 ? DOWNCASE_TABLE
: Qnil
),
373 STRING_MULTIBYTE (string
));
375 re_match_object
= string
;
377 val
= re_search (bufp
, (char *) XSTRING (string
)->data
,
378 STRING_BYTES (XSTRING (string
)), pos_byte
,
379 STRING_BYTES (XSTRING (string
)) - pos_byte
,
382 last_thing_searched
= Qt
;
385 if (val
< 0) return Qnil
;
387 for (i
= 0; i
< search_regs
.num_regs
; i
++)
388 if (search_regs
.start
[i
] >= 0)
391 = string_byte_to_char (string
, search_regs
.start
[i
]);
393 = string_byte_to_char (string
, search_regs
.end
[i
]);
396 return make_number (string_byte_to_char (string
, val
));
399 DEFUN ("string-match", Fstring_match
, Sstring_match
, 2, 3, 0,
400 "Return index of start of first match for REGEXP in STRING, or nil.\n\
401 If third arg START is non-nil, start search at that index in STRING.\n\
402 For index of first char beyond the match, do (match-end 0).\n\
403 `match-end' and `match-beginning' also give indices of substrings\n\
404 matched by parenthesis constructs in the pattern.")
405 (regexp
, string
, start
)
406 Lisp_Object regexp
, string
, start
;
408 return string_match_1 (regexp
, string
, start
, 0);
411 DEFUN ("posix-string-match", Fposix_string_match
, Sposix_string_match
, 2, 3, 0,
412 "Return index of start of first match for REGEXP in STRING, or nil.\n\
413 Find the longest match, in accord with Posix regular expression rules.\n\
414 If third arg START is non-nil, start search at that index in STRING.\n\
415 For index of first char beyond the match, do (match-end 0).\n\
416 `match-end' and `match-beginning' also give indices of substrings\n\
417 matched by parenthesis constructs in the pattern.")
418 (regexp
, string
, start
)
419 Lisp_Object regexp
, string
, start
;
421 return string_match_1 (regexp
, string
, start
, 1);
424 /* Match REGEXP against STRING, searching all of STRING,
425 and return the index of the match, or negative on failure.
426 This does not clobber the match data. */
429 fast_string_match (regexp
, string
)
430 Lisp_Object regexp
, string
;
433 struct re_pattern_buffer
*bufp
;
435 bufp
= compile_pattern (regexp
, 0, Qnil
,
436 0, STRING_MULTIBYTE (string
));
438 re_match_object
= string
;
440 val
= re_search (bufp
, (char *) XSTRING (string
)->data
,
441 STRING_BYTES (XSTRING (string
)), 0,
442 STRING_BYTES (XSTRING (string
)), 0);
447 /* Match REGEXP against STRING, searching all of STRING ignoring case,
448 and return the index of the match, or negative on failure.
449 This does not clobber the match data.
450 We assume that STRING contains single-byte characters. */
452 extern Lisp_Object Vascii_downcase_table
;
455 fast_c_string_match_ignore_case (regexp
, string
)
460 struct re_pattern_buffer
*bufp
;
461 int len
= strlen (string
);
463 regexp
= string_make_unibyte (regexp
);
464 re_match_object
= Qt
;
465 bufp
= compile_pattern (regexp
, 0,
466 Vascii_downcase_table
, 0,
469 val
= re_search (bufp
, string
, len
, 0, len
, 0);
474 /* The newline cache: remembering which sections of text have no newlines. */
476 /* If the user has requested newline caching, make sure it's on.
477 Otherwise, make sure it's off.
478 This is our cheezy way of associating an action with the change of
479 state of a buffer-local variable. */
481 newline_cache_on_off (buf
)
484 if (NILP (buf
->cache_long_line_scans
))
486 /* It should be off. */
487 if (buf
->newline_cache
)
489 free_region_cache (buf
->newline_cache
);
490 buf
->newline_cache
= 0;
495 /* It should be on. */
496 if (buf
->newline_cache
== 0)
497 buf
->newline_cache
= new_region_cache ();
502 /* Search for COUNT instances of the character TARGET between START and END.
504 If COUNT is positive, search forwards; END must be >= START.
505 If COUNT is negative, search backwards for the -COUNTth instance;
506 END must be <= START.
507 If COUNT is zero, do anything you please; run rogue, for all I care.
509 If END is zero, use BEGV or ZV instead, as appropriate for the
510 direction indicated by COUNT.
512 If we find COUNT instances, set *SHORTAGE to zero, and return the
513 position after the COUNTth match. Note that for reverse motion
514 this is not the same as the usual convention for Emacs motion commands.
516 If we don't find COUNT instances before reaching END, set *SHORTAGE
517 to the number of TARGETs left unfound, and return END.
519 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
520 except when inside redisplay. */
523 scan_buffer (target
, start
, end
, count
, shortage
, allow_quit
)
530 struct region_cache
*newline_cache
;
541 if (! end
) end
= BEGV
;
544 newline_cache_on_off (current_buffer
);
545 newline_cache
= current_buffer
->newline_cache
;
550 immediate_quit
= allow_quit
;
555 /* Our innermost scanning loop is very simple; it doesn't know
556 about gaps, buffer ends, or the newline cache. ceiling is
557 the position of the last character before the next such
558 obstacle --- the last character the dumb search loop should
560 int ceiling_byte
= CHAR_TO_BYTE (end
) - 1;
561 int start_byte
= CHAR_TO_BYTE (start
);
564 /* If we're looking for a newline, consult the newline cache
565 to see where we can avoid some scanning. */
566 if (target
== '\n' && newline_cache
)
570 while (region_cache_forward
571 (current_buffer
, newline_cache
, start_byte
, &next_change
))
572 start_byte
= next_change
;
573 immediate_quit
= allow_quit
;
575 /* START should never be after END. */
576 if (start_byte
> ceiling_byte
)
577 start_byte
= ceiling_byte
;
579 /* Now the text after start is an unknown region, and
580 next_change is the position of the next known region. */
581 ceiling_byte
= min (next_change
- 1, ceiling_byte
);
584 /* The dumb loop can only scan text stored in contiguous
585 bytes. BUFFER_CEILING_OF returns the last character
586 position that is contiguous, so the ceiling is the
587 position after that. */
588 tem
= BUFFER_CEILING_OF (start_byte
);
589 ceiling_byte
= min (tem
, ceiling_byte
);
592 /* The termination address of the dumb loop. */
593 register unsigned char *ceiling_addr
594 = BYTE_POS_ADDR (ceiling_byte
) + 1;
595 register unsigned char *cursor
596 = BYTE_POS_ADDR (start_byte
);
597 unsigned char *base
= cursor
;
599 while (cursor
< ceiling_addr
)
601 unsigned char *scan_start
= cursor
;
604 while (*cursor
!= target
&& ++cursor
< ceiling_addr
)
607 /* If we're looking for newlines, cache the fact that
608 the region from start to cursor is free of them. */
609 if (target
== '\n' && newline_cache
)
610 know_region_cache (current_buffer
, newline_cache
,
611 start_byte
+ scan_start
- base
,
612 start_byte
+ cursor
- base
);
614 /* Did we find the target character? */
615 if (cursor
< ceiling_addr
)
620 return BYTE_TO_CHAR (start_byte
+ cursor
- base
+ 1);
626 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
632 /* The last character to check before the next obstacle. */
633 int ceiling_byte
= CHAR_TO_BYTE (end
);
634 int start_byte
= CHAR_TO_BYTE (start
);
637 /* Consult the newline cache, if appropriate. */
638 if (target
== '\n' && newline_cache
)
642 while (region_cache_backward
643 (current_buffer
, newline_cache
, start_byte
, &next_change
))
644 start_byte
= next_change
;
645 immediate_quit
= allow_quit
;
647 /* Start should never be at or before end. */
648 if (start_byte
<= ceiling_byte
)
649 start_byte
= ceiling_byte
+ 1;
651 /* Now the text before start is an unknown region, and
652 next_change is the position of the next known region. */
653 ceiling_byte
= max (next_change
, ceiling_byte
);
656 /* Stop scanning before the gap. */
657 tem
= BUFFER_FLOOR_OF (start_byte
- 1);
658 ceiling_byte
= max (tem
, ceiling_byte
);
661 /* The termination address of the dumb loop. */
662 register unsigned char *ceiling_addr
= BYTE_POS_ADDR (ceiling_byte
);
663 register unsigned char *cursor
= BYTE_POS_ADDR (start_byte
- 1);
664 unsigned char *base
= cursor
;
666 while (cursor
>= ceiling_addr
)
668 unsigned char *scan_start
= cursor
;
670 while (*cursor
!= target
&& --cursor
>= ceiling_addr
)
673 /* If we're looking for newlines, cache the fact that
674 the region from after the cursor to start is free of them. */
675 if (target
== '\n' && newline_cache
)
676 know_region_cache (current_buffer
, newline_cache
,
677 start_byte
+ cursor
- base
,
678 start_byte
+ scan_start
- base
);
680 /* Did we find the target character? */
681 if (cursor
>= ceiling_addr
)
686 return BYTE_TO_CHAR (start_byte
+ cursor
- base
);
692 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
698 *shortage
= count
* direction
;
702 /* Search for COUNT instances of a line boundary, which means either a
703 newline or (if selective display enabled) a carriage return.
704 Start at START. If COUNT is negative, search backwards.
706 We report the resulting position by calling TEMP_SET_PT_BOTH.
708 If we find COUNT instances. we position after (always after,
709 even if scanning backwards) the COUNTth match, and return 0.
711 If we don't find COUNT instances before reaching the end of the
712 buffer (or the beginning, if scanning backwards), we return
713 the number of line boundaries left unfound, and position at
714 the limit we bumped up against.
716 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
717 except in special cases. */
720 scan_newline (start
, start_byte
, limit
, limit_byte
, count
, allow_quit
)
721 int start
, start_byte
;
722 int limit
, limit_byte
;
726 int direction
= ((count
> 0) ? 1 : -1);
728 register unsigned char *cursor
;
731 register int ceiling
;
732 register unsigned char *ceiling_addr
;
734 int old_immediate_quit
= immediate_quit
;
736 /* If we are not in selective display mode,
737 check only for newlines. */
738 int selective_display
= (!NILP (current_buffer
->selective_display
)
739 && !INTEGERP (current_buffer
->selective_display
));
741 /* The code that follows is like scan_buffer
742 but checks for either newline or carriage return. */
747 start_byte
= CHAR_TO_BYTE (start
);
751 while (start_byte
< limit_byte
)
753 ceiling
= BUFFER_CEILING_OF (start_byte
);
754 ceiling
= min (limit_byte
- 1, ceiling
);
755 ceiling_addr
= BYTE_POS_ADDR (ceiling
) + 1;
756 base
= (cursor
= BYTE_POS_ADDR (start_byte
));
759 while (*cursor
!= '\n' && ++cursor
!= ceiling_addr
)
762 if (cursor
!= ceiling_addr
)
766 immediate_quit
= old_immediate_quit
;
767 start_byte
= start_byte
+ cursor
- base
+ 1;
768 start
= BYTE_TO_CHAR (start_byte
);
769 TEMP_SET_PT_BOTH (start
, start_byte
);
773 if (++cursor
== ceiling_addr
)
779 start_byte
+= cursor
- base
;
784 while (start_byte
> limit_byte
)
786 ceiling
= BUFFER_FLOOR_OF (start_byte
- 1);
787 ceiling
= max (limit_byte
, ceiling
);
788 ceiling_addr
= BYTE_POS_ADDR (ceiling
) - 1;
789 base
= (cursor
= BYTE_POS_ADDR (start_byte
- 1) + 1);
792 while (--cursor
!= ceiling_addr
&& *cursor
!= '\n')
795 if (cursor
!= ceiling_addr
)
799 immediate_quit
= old_immediate_quit
;
800 /* Return the position AFTER the match we found. */
801 start_byte
= start_byte
+ cursor
- base
+ 1;
802 start
= BYTE_TO_CHAR (start_byte
);
803 TEMP_SET_PT_BOTH (start
, start_byte
);
810 /* Here we add 1 to compensate for the last decrement
811 of CURSOR, which took it past the valid range. */
812 start_byte
+= cursor
- base
+ 1;
816 TEMP_SET_PT_BOTH (limit
, limit_byte
);
817 immediate_quit
= old_immediate_quit
;
819 return count
* direction
;
823 find_next_newline_no_quit (from
, cnt
)
824 register int from
, cnt
;
826 return scan_buffer ('\n', from
, 0, cnt
, (int *) 0, 0);
829 /* Like find_next_newline, but returns position before the newline,
830 not after, and only search up to TO. This isn't just
831 find_next_newline (...)-1, because you might hit TO. */
834 find_before_next_newline (from
, to
, cnt
)
838 int pos
= scan_buffer ('\n', from
, to
, cnt
, &shortage
, 1);
846 /* Subroutines of Lisp buffer search functions. */
849 search_command (string
, bound
, noerror
, count
, direction
, RE
, posix
)
850 Lisp_Object string
, bound
, noerror
, count
;
861 CHECK_NUMBER (count
, 3);
865 CHECK_STRING (string
, 0);
869 lim
= ZV
, lim_byte
= ZV_BYTE
;
871 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
875 CHECK_NUMBER_COERCE_MARKER (bound
, 1);
877 if (n
> 0 ? lim
< PT
: lim
> PT
)
878 error ("Invalid search bound (wrong side of point)");
880 lim
= ZV
, lim_byte
= ZV_BYTE
;
882 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
884 lim_byte
= CHAR_TO_BYTE (lim
);
887 np
= search_buffer (string
, PT
, PT_BYTE
, lim
, lim_byte
, n
, RE
,
888 (!NILP (current_buffer
->case_fold_search
)
889 ? current_buffer
->case_canon_table
891 (!NILP (current_buffer
->case_fold_search
)
892 ? current_buffer
->case_eqv_table
898 return signal_failure (string
);
899 if (!EQ (noerror
, Qt
))
901 if (lim
< BEGV
|| lim
> ZV
)
903 SET_PT_BOTH (lim
, lim_byte
);
905 #if 0 /* This would be clean, but maybe programs depend on
906 a value of nil here. */
914 if (np
< BEGV
|| np
> ZV
)
919 return make_number (np
);
922 /* Return 1 if REGEXP it matches just one constant string. */
925 trivial_regexp_p (regexp
)
928 int len
= STRING_BYTES (XSTRING (regexp
));
929 unsigned char *s
= XSTRING (regexp
)->data
;
935 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
942 case '|': case '(': case ')': case '`': case '\'': case 'b':
943 case 'B': case '<': case '>': case 'w': case 'W': case 's':
945 case 'c': case 'C': /* for categoryspec and notcategoryspec */
946 case '1': case '2': case '3': case '4': case '5':
947 case '6': case '7': case '8': case '9':
955 /* Search for the n'th occurrence of STRING in the current buffer,
956 starting at position POS and stopping at position LIM,
957 treating STRING as a literal string if RE is false or as
958 a regular expression if RE is true.
960 If N is positive, searching is forward and LIM must be greater than POS.
961 If N is negative, searching is backward and LIM must be less than POS.
963 Returns -x if x occurrences remain to be found (x > 0),
964 or else the position at the beginning of the Nth occurrence
965 (if searching backward) or the end (if searching forward).
967 POSIX is nonzero if we want full backtracking (POSIX style)
968 for this pattern. 0 means backtrack only enough to get a valid match. */
970 #define TRANSLATE(out, trt, d) \
976 temp = Faref (trt, make_number (d)); \
977 if (INTEGERP (temp)) \
988 search_buffer (string
, pos
, pos_byte
, lim
, lim_byte
, n
,
989 RE
, trt
, inverse_trt
, posix
)
998 Lisp_Object inverse_trt
;
1001 int len
= XSTRING (string
)->size
;
1002 int len_byte
= STRING_BYTES (XSTRING (string
));
1005 if (running_asynch_code
)
1006 save_search_regs ();
1008 /* Null string is found at starting position. */
1011 set_search_regs (pos
, 0);
1015 /* Searching 0 times means don't move. */
1019 if (RE
&& !trivial_regexp_p (string
))
1021 unsigned char *p1
, *p2
;
1023 struct re_pattern_buffer
*bufp
;
1025 bufp
= compile_pattern (string
, &search_regs
, trt
, posix
,
1026 !NILP (current_buffer
->enable_multibyte_characters
));
1028 immediate_quit
= 1; /* Quit immediately if user types ^G,
1029 because letting this function finish
1030 can take too long. */
1031 QUIT
; /* Do a pending quit right away,
1032 to avoid paradoxical behavior */
1033 /* Get pointers and sizes of the two strings
1034 that make up the visible portion of the buffer. */
1037 s1
= GPT_BYTE
- BEGV_BYTE
;
1039 s2
= ZV_BYTE
- GPT_BYTE
;
1043 s2
= ZV_BYTE
- BEGV_BYTE
;
1048 s1
= ZV_BYTE
- BEGV_BYTE
;
1051 re_match_object
= Qnil
;
1056 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1057 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1059 /* Don't allow match past current point */
1060 pos_byte
- BEGV_BYTE
);
1063 matcher_overflow ();
1067 pos_byte
= search_regs
.start
[0] + BEGV_BYTE
;
1068 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1069 if (search_regs
.start
[i
] >= 0)
1071 search_regs
.start
[i
]
1072 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1074 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1076 XSETBUFFER (last_thing_searched
, current_buffer
);
1077 /* Set pos to the new position. */
1078 pos
= search_regs
.start
[0];
1090 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1091 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1093 lim_byte
- BEGV_BYTE
);
1096 matcher_overflow ();
1100 pos_byte
= search_regs
.end
[0] + BEGV_BYTE
;
1101 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1102 if (search_regs
.start
[i
] >= 0)
1104 search_regs
.start
[i
]
1105 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1107 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1109 XSETBUFFER (last_thing_searched
, current_buffer
);
1110 pos
= search_regs
.end
[0];
1122 else /* non-RE case */
1124 unsigned char *raw_pattern
, *pat
;
1125 int raw_pattern_size
;
1126 int raw_pattern_size_byte
;
1127 unsigned char *patbuf
;
1128 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
1129 unsigned char *base_pat
= XSTRING (string
)->data
;
1130 int charset_base
= -1;
1133 /* MULTIBYTE says whether the text to be searched is multibyte.
1134 We must convert PATTERN to match that, or we will not really
1135 find things right. */
1137 if (multibyte
== STRING_MULTIBYTE (string
))
1139 raw_pattern
= (unsigned char *) XSTRING (string
)->data
;
1140 raw_pattern_size
= XSTRING (string
)->size
;
1141 raw_pattern_size_byte
= STRING_BYTES (XSTRING (string
));
1145 raw_pattern_size
= XSTRING (string
)->size
;
1146 raw_pattern_size_byte
1147 = count_size_as_multibyte (XSTRING (string
)->data
,
1149 raw_pattern
= (unsigned char *) alloca (raw_pattern_size_byte
+ 1);
1150 copy_text (XSTRING (string
)->data
, raw_pattern
,
1151 XSTRING (string
)->size
, 0, 1);
1155 /* Converting multibyte to single-byte.
1157 ??? Perhaps this conversion should be done in a special way
1158 by subtracting nonascii-insert-offset from each non-ASCII char,
1159 so that only the multibyte chars which really correspond to
1160 the chosen single-byte character set can possibly match. */
1161 raw_pattern_size
= XSTRING (string
)->size
;
1162 raw_pattern_size_byte
= XSTRING (string
)->size
;
1163 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
1164 copy_text (XSTRING (string
)->data
, raw_pattern
,
1165 STRING_BYTES (XSTRING (string
)), 1, 0);
1168 /* Copy and optionally translate the pattern. */
1169 len
= raw_pattern_size
;
1170 len_byte
= raw_pattern_size_byte
;
1171 patbuf
= (unsigned char *) alloca (len_byte
);
1173 base_pat
= raw_pattern
;
1178 unsigned char workbuf
[4], *str
;
1179 int c
, translated
, inverse
;
1180 int in_charlen
, charlen
;
1182 /* If we got here and the RE flag is set, it's because we're
1183 dealing with a regexp known to be trivial, so the backslash
1184 just quotes the next character. */
1185 if (RE
&& *base_pat
== '\\')
1192 c
= STRING_CHAR_AND_LENGTH (base_pat
, len_byte
, in_charlen
);
1193 /* Translate the character, if requested. */
1194 TRANSLATE (translated
, trt
, c
);
1195 /* If translation changed the byte-length, go back
1196 to the original character. */
1197 charlen
= CHAR_STRING (translated
, workbuf
, str
);
1198 if (in_charlen
!= charlen
)
1201 charlen
= CHAR_STRING (c
, workbuf
, str
);
1204 TRANSLATE (inverse
, inverse_trt
, c
);
1206 /* Did this char actually get translated?
1207 Would any other char get translated into it? */
1208 if (translated
!= c
|| inverse
!= c
)
1210 /* Keep track of which character set row
1211 contains the characters that need translation. */
1212 int charset_base_code
= c
& ~0xff;
1213 if (charset_base
== -1)
1214 charset_base
= charset_base_code
;
1215 else if (charset_base
!= charset_base_code
)
1216 /* If two different rows appear, needing translation,
1217 then we cannot use boyer_moore search. */
1219 /* ??? Handa: this must do simple = 0
1220 if c is a composite character. */
1223 /* Store this character into the translated pattern. */
1224 bcopy (str
, pat
, charlen
);
1226 base_pat
+= in_charlen
;
1227 len_byte
-= in_charlen
;
1234 int c
, translated
, inverse
;
1236 /* If we got here and the RE flag is set, it's because we're
1237 dealing with a regexp known to be trivial, so the backslash
1238 just quotes the next character. */
1239 if (RE
&& *base_pat
== '\\')
1245 TRANSLATE (translated
, trt
, c
);
1246 TRANSLATE (inverse
, inverse_trt
, c
);
1248 /* Did this char actually get translated?
1249 Would any other char get translated into it? */
1250 if (translated
!= c
|| inverse
!= c
)
1252 /* Keep track of which character set row
1253 contains the characters that need translation. */
1254 int charset_base_code
= c
& ~0xff;
1255 if (charset_base
== -1)
1256 charset_base
= charset_base_code
;
1257 else if (charset_base
!= charset_base_code
)
1258 /* If two different rows appear, needing translation,
1259 then we cannot use boyer_moore search. */
1262 *pat
++ = translated
;
1266 len_byte
= pat
- patbuf
;
1267 len
= raw_pattern_size
;
1268 pat
= base_pat
= patbuf
;
1271 return boyer_moore (n
, pat
, len
, len_byte
, trt
, inverse_trt
,
1272 pos
, pos_byte
, lim
, lim_byte
,
1275 return simple_search (n
, pat
, len
, len_byte
, trt
,
1276 pos
, pos_byte
, lim
, lim_byte
);
1280 /* Do a simple string search N times for the string PAT,
1281 whose length is LEN/LEN_BYTE,
1282 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1283 TRT is the translation table.
1285 Return the character position where the match is found.
1286 Otherwise, if M matches remained to be found, return -M.
1288 This kind of search works regardless of what is in PAT and
1289 regardless of what is in TRT. It is used in cases where
1290 boyer_moore cannot work. */
1293 simple_search (n
, pat
, len
, len_byte
, trt
, pos
, pos_byte
, lim
, lim_byte
)
1301 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1302 int forward
= n
> 0;
1304 if (lim
> pos
&& multibyte
)
1309 /* Try matching at position POS. */
1311 int this_pos_byte
= pos_byte
;
1313 int this_len_byte
= len_byte
;
1314 unsigned char *p
= pat
;
1315 if (pos
+ len
> lim
)
1318 while (this_len
> 0)
1320 int charlen
, buf_charlen
;
1323 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1324 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1325 ZV_BYTE
- this_pos_byte
,
1327 TRANSLATE (buf_ch
, trt
, buf_ch
);
1329 if (buf_ch
!= pat_ch
)
1332 this_len_byte
-= charlen
;
1336 this_pos_byte
+= buf_charlen
;
1343 pos_byte
+= len_byte
;
1347 INC_BOTH (pos
, pos_byte
);
1357 /* Try matching at position POS. */
1360 unsigned char *p
= pat
;
1362 if (pos
+ len
> lim
)
1365 while (this_len
> 0)
1368 int buf_ch
= FETCH_BYTE (this_pos
);
1369 TRANSLATE (buf_ch
, trt
, buf_ch
);
1371 if (buf_ch
!= pat_ch
)
1389 /* Backwards search. */
1390 else if (lim
< pos
&& multibyte
)
1395 /* Try matching at position POS. */
1396 int this_pos
= pos
- len
;
1397 int this_pos_byte
= pos_byte
- len_byte
;
1399 int this_len_byte
= len_byte
;
1400 unsigned char *p
= pat
;
1402 if (pos
- len
< lim
)
1405 while (this_len
> 0)
1407 int charlen
, buf_charlen
;
1410 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1411 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1412 ZV_BYTE
- this_pos_byte
,
1414 TRANSLATE (buf_ch
, trt
, buf_ch
);
1416 if (buf_ch
!= pat_ch
)
1419 this_len_byte
-= charlen
;
1422 this_pos_byte
+= buf_charlen
;
1429 pos_byte
-= len_byte
;
1433 DEC_BOTH (pos
, pos_byte
);
1443 /* Try matching at position POS. */
1444 int this_pos
= pos
- len
;
1446 unsigned char *p
= pat
;
1448 if (pos
- len
< lim
)
1451 while (this_len
> 0)
1454 int buf_ch
= FETCH_BYTE (this_pos
);
1455 TRANSLATE (buf_ch
, trt
, buf_ch
);
1457 if (buf_ch
!= pat_ch
)
1479 set_search_regs ((multibyte
? pos_byte
: pos
) - len_byte
, len_byte
);
1481 set_search_regs (multibyte
? pos_byte
: pos
, len_byte
);
1491 /* Do Boyer-Moore search N times for the string PAT,
1492 whose length is LEN/LEN_BYTE,
1493 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1494 DIRECTION says which direction we search in.
1495 TRT and INVERSE_TRT are translation tables.
1497 This kind of search works if all the characters in PAT that have
1498 nontrivial translation are the same aside from the last byte. This
1499 makes it possible to translate just the last byte of a character,
1500 and do so after just a simple test of the context.
1502 If that criterion is not satisfied, do not call this function. */
1505 boyer_moore (n
, base_pat
, len
, len_byte
, trt
, inverse_trt
,
1506 pos
, pos_byte
, lim
, lim_byte
, charset_base
)
1508 unsigned char *base_pat
;
1511 Lisp_Object inverse_trt
;
1516 int direction
= ((n
> 0) ? 1 : -1);
1517 register int dirlen
;
1518 int infinity
, limit
, k
, stride_for_teases
;
1519 register int *BM_tab
;
1521 register unsigned char *cursor
, *p_limit
;
1523 unsigned char *pat
, *pat_end
;
1524 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1526 unsigned char simple_translate
[0400];
1527 int translate_prev_byte
;
1528 int translate_anteprev_byte
;
1531 int BM_tab_space
[0400];
1532 BM_tab
= &BM_tab_space
[0];
1534 BM_tab
= (int *) alloca (0400 * sizeof (int));
1536 /* The general approach is that we are going to maintain that we know */
1537 /* the first (closest to the present position, in whatever direction */
1538 /* we're searching) character that could possibly be the last */
1539 /* (furthest from present position) character of a valid match. We */
1540 /* advance the state of our knowledge by looking at that character */
1541 /* and seeing whether it indeed matches the last character of the */
1542 /* pattern. If it does, we take a closer look. If it does not, we */
1543 /* move our pointer (to putative last characters) as far as is */
1544 /* logically possible. This amount of movement, which I call a */
1545 /* stride, will be the length of the pattern if the actual character */
1546 /* appears nowhere in the pattern, otherwise it will be the distance */
1547 /* from the last occurrence of that character to the end of the */
1549 /* As a coding trick, an enormous stride is coded into the table for */
1550 /* characters that match the last character. This allows use of only */
1551 /* a single test, a test for having gone past the end of the */
1552 /* permissible match region, to test for both possible matches (when */
1553 /* the stride goes past the end immediately) and failure to */
1554 /* match (where you get nudged past the end one stride at a time). */
1556 /* Here we make a "mickey mouse" BM table. The stride of the search */
1557 /* is determined only by the last character of the putative match. */
1558 /* If that character does not match, we will stride the proper */
1559 /* distance to propose a match that superimposes it on the last */
1560 /* instance of a character that matches it (per trt), or misses */
1561 /* it entirely if there is none. */
1563 dirlen
= len_byte
* direction
;
1564 infinity
= dirlen
- (lim_byte
+ pos_byte
+ len_byte
+ len_byte
) * direction
;
1566 /* Record position after the end of the pattern. */
1567 pat_end
= base_pat
+ len_byte
;
1568 /* BASE_PAT points to a character that we start scanning from.
1569 It is the first character in a forward search,
1570 the last character in a backward search. */
1572 base_pat
= pat_end
- 1;
1574 BM_tab_base
= BM_tab
;
1576 j
= dirlen
; /* to get it in a register */
1577 /* A character that does not appear in the pattern induces a */
1578 /* stride equal to the pattern length. */
1579 while (BM_tab_base
!= BM_tab
)
1587 /* We use this for translation, instead of TRT itself.
1588 We fill this in to handle the characters that actually
1589 occur in the pattern. Others don't matter anyway! */
1590 bzero (simple_translate
, sizeof simple_translate
);
1591 for (i
= 0; i
< 0400; i
++)
1592 simple_translate
[i
] = i
;
1595 while (i
!= infinity
)
1597 unsigned char *ptr
= base_pat
+ i
;
1605 int this_translated
= 1;
1608 /* Is *PTR the last byte of a character? */
1609 && (pat_end
- ptr
== 1 || CHAR_HEAD_P (ptr
[1])))
1611 unsigned char *charstart
= ptr
;
1612 while (! CHAR_HEAD_P (*charstart
))
1614 untranslated
= STRING_CHAR (charstart
, ptr
- charstart
+ 1);
1615 if (charset_base
== (untranslated
& ~0xff))
1617 TRANSLATE (ch
, trt
, untranslated
);
1618 if (! CHAR_HEAD_P (*ptr
))
1620 translate_prev_byte
= ptr
[-1];
1621 if (! CHAR_HEAD_P (translate_prev_byte
))
1622 translate_anteprev_byte
= ptr
[-2];
1627 this_translated
= 0;
1631 else if (!multibyte
)
1632 TRANSLATE (ch
, trt
, *ptr
);
1636 this_translated
= 0;
1640 j
= ((unsigned char) ch
) | 0200;
1642 j
= (unsigned char) ch
;
1645 stride_for_teases
= BM_tab
[j
];
1647 BM_tab
[j
] = dirlen
- i
;
1648 /* A translation table is accompanied by its inverse -- see */
1649 /* comment following downcase_table for details */
1650 if (this_translated
)
1652 int starting_ch
= ch
;
1656 TRANSLATE (ch
, inverse_trt
, ch
);
1658 j
= ((unsigned char) ch
) | 0200;
1660 j
= (unsigned char) ch
;
1662 /* For all the characters that map into CH,
1663 set up simple_translate to map the last byte
1665 simple_translate
[j
] = starting_j
;
1666 if (ch
== starting_ch
)
1668 BM_tab
[j
] = dirlen
- i
;
1677 stride_for_teases
= BM_tab
[j
];
1678 BM_tab
[j
] = dirlen
- i
;
1680 /* stride_for_teases tells how much to stride if we get a */
1681 /* match on the far character but are subsequently */
1682 /* disappointed, by recording what the stride would have been */
1683 /* for that character if the last character had been */
1686 infinity
= dirlen
- infinity
;
1687 pos_byte
+= dirlen
- ((direction
> 0) ? direction
: 0);
1688 /* loop invariant - POS_BYTE points at where last char (first
1689 char if reverse) of pattern would align in a possible match. */
1693 unsigned char *tail_end_ptr
;
1695 /* It's been reported that some (broken) compiler thinks that
1696 Boolean expressions in an arithmetic context are unsigned.
1697 Using an explicit ?1:0 prevents this. */
1698 if ((lim_byte
- pos_byte
- ((direction
> 0) ? 1 : 0)) * direction
1700 return (n
* (0 - direction
));
1701 /* First we do the part we can by pointers (maybe nothing) */
1704 limit
= pos_byte
- dirlen
+ direction
;
1707 limit
= BUFFER_CEILING_OF (limit
);
1708 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1709 can take on without hitting edge of buffer or the gap. */
1710 limit
= min (limit
, pos_byte
+ 20000);
1711 limit
= min (limit
, lim_byte
- 1);
1715 limit
= BUFFER_FLOOR_OF (limit
);
1716 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1717 can take on without hitting edge of buffer or the gap. */
1718 limit
= max (limit
, pos_byte
- 20000);
1719 limit
= max (limit
, lim_byte
);
1721 tail_end
= BUFFER_CEILING_OF (pos_byte
) + 1;
1722 tail_end_ptr
= BYTE_POS_ADDR (tail_end
);
1724 if ((limit
- pos_byte
) * direction
> 20)
1728 p_limit
= BYTE_POS_ADDR (limit
);
1729 p2
= (cursor
= BYTE_POS_ADDR (pos_byte
));
1730 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1731 while (1) /* use one cursor setting as long as i can */
1733 if (direction
> 0) /* worth duplicating */
1735 /* Use signed comparison if appropriate
1736 to make cursor+infinity sure to be > p_limit.
1737 Assuming that the buffer lies in a range of addresses
1738 that are all "positive" (as ints) or all "negative",
1739 either kind of comparison will work as long
1740 as we don't step by infinity. So pick the kind
1741 that works when we do step by infinity. */
1742 if ((EMACS_INT
) (p_limit
+ infinity
) > (EMACS_INT
) p_limit
)
1743 while ((EMACS_INT
) cursor
<= (EMACS_INT
) p_limit
)
1744 cursor
+= BM_tab
[*cursor
];
1746 while ((EMACS_UINT
) cursor
<= (EMACS_UINT
) p_limit
)
1747 cursor
+= BM_tab
[*cursor
];
1751 if ((EMACS_INT
) (p_limit
+ infinity
) < (EMACS_INT
) p_limit
)
1752 while ((EMACS_INT
) cursor
>= (EMACS_INT
) p_limit
)
1753 cursor
+= BM_tab
[*cursor
];
1755 while ((EMACS_UINT
) cursor
>= (EMACS_UINT
) p_limit
)
1756 cursor
+= BM_tab
[*cursor
];
1758 /* If you are here, cursor is beyond the end of the searched region. */
1759 /* This can happen if you match on the far character of the pattern, */
1760 /* because the "stride" of that character is infinity, a number able */
1761 /* to throw you well beyond the end of the search. It can also */
1762 /* happen if you fail to match within the permitted region and would */
1763 /* otherwise try a character beyond that region */
1764 if ((cursor
- p_limit
) * direction
<= len_byte
)
1765 break; /* a small overrun is genuine */
1766 cursor
-= infinity
; /* large overrun = hit */
1767 i
= dirlen
- direction
;
1770 while ((i
-= direction
) + direction
!= 0)
1773 cursor
-= direction
;
1774 /* Translate only the last byte of a character. */
1776 || ((cursor
== tail_end_ptr
1777 || CHAR_HEAD_P (cursor
[1]))
1778 && (CHAR_HEAD_P (cursor
[0])
1779 || (translate_prev_byte
== cursor
[-1]
1780 && (CHAR_HEAD_P (translate_prev_byte
)
1781 || translate_anteprev_byte
== cursor
[-2])))))
1782 ch
= simple_translate
[*cursor
];
1791 while ((i
-= direction
) + direction
!= 0)
1793 cursor
-= direction
;
1794 if (pat
[i
] != *cursor
)
1798 cursor
+= dirlen
- i
- direction
; /* fix cursor */
1799 if (i
+ direction
== 0)
1803 cursor
-= direction
;
1805 position
= pos_byte
+ cursor
- p2
+ ((direction
> 0)
1806 ? 1 - len_byte
: 0);
1807 set_search_regs (position
, len_byte
);
1809 if ((n
-= direction
) != 0)
1810 cursor
+= dirlen
; /* to resume search */
1812 return ((direction
> 0)
1813 ? search_regs
.end
[0] : search_regs
.start
[0]);
1816 cursor
+= stride_for_teases
; /* <sigh> we lose - */
1818 pos_byte
+= cursor
- p2
;
1821 /* Now we'll pick up a clump that has to be done the hard */
1822 /* way because it covers a discontinuity */
1824 limit
= ((direction
> 0)
1825 ? BUFFER_CEILING_OF (pos_byte
- dirlen
+ 1)
1826 : BUFFER_FLOOR_OF (pos_byte
- dirlen
- 1));
1827 limit
= ((direction
> 0)
1828 ? min (limit
+ len_byte
, lim_byte
- 1)
1829 : max (limit
- len_byte
, lim_byte
));
1830 /* LIMIT is now the last value POS_BYTE can have
1831 and still be valid for a possible match. */
1834 /* This loop can be coded for space rather than */
1835 /* speed because it will usually run only once. */
1836 /* (the reach is at most len + 21, and typically */
1837 /* does not exceed len) */
1838 while ((limit
- pos_byte
) * direction
>= 0)
1839 pos_byte
+= BM_tab
[FETCH_BYTE (pos_byte
)];
1840 /* now run the same tests to distinguish going off the */
1841 /* end, a match or a phony match. */
1842 if ((pos_byte
- limit
) * direction
<= len_byte
)
1843 break; /* ran off the end */
1844 /* Found what might be a match.
1845 Set POS_BYTE back to last (first if reverse) pos. */
1846 pos_byte
-= infinity
;
1847 i
= dirlen
- direction
;
1848 while ((i
-= direction
) + direction
!= 0)
1852 pos_byte
-= direction
;
1853 ptr
= BYTE_POS_ADDR (pos_byte
);
1854 /* Translate only the last byte of a character. */
1856 || ((ptr
== tail_end_ptr
1857 || CHAR_HEAD_P (ptr
[1]))
1858 && (CHAR_HEAD_P (ptr
[0])
1859 || (translate_prev_byte
== ptr
[-1]
1860 && (CHAR_HEAD_P (translate_prev_byte
)
1861 || translate_anteprev_byte
== ptr
[-2])))))
1862 ch
= simple_translate
[*ptr
];
1868 /* Above loop has moved POS_BYTE part or all the way
1869 back to the first pos (last pos if reverse).
1870 Set it once again at the last (first if reverse) char. */
1871 pos_byte
+= dirlen
- i
- direction
;
1872 if (i
+ direction
== 0)
1875 pos_byte
-= direction
;
1877 position
= pos_byte
+ ((direction
> 0) ? 1 - len_byte
: 0);
1879 set_search_regs (position
, len_byte
);
1881 if ((n
-= direction
) != 0)
1882 pos_byte
+= dirlen
; /* to resume search */
1884 return ((direction
> 0)
1885 ? search_regs
.end
[0] : search_regs
.start
[0]);
1888 pos_byte
+= stride_for_teases
;
1891 /* We have done one clump. Can we continue? */
1892 if ((lim_byte
- pos_byte
) * direction
< 0)
1893 return ((0 - n
) * direction
);
1895 return BYTE_TO_CHAR (pos_byte
);
1898 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1899 for a match just found in the current buffer. */
1902 set_search_regs (beg_byte
, nbytes
)
1903 int beg_byte
, nbytes
;
1905 /* Make sure we have registers in which to store
1906 the match position. */
1907 if (search_regs
.num_regs
== 0)
1909 search_regs
.start
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
1910 search_regs
.end
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
1911 search_regs
.num_regs
= 2;
1914 search_regs
.start
[0] = BYTE_TO_CHAR (beg_byte
);
1915 search_regs
.end
[0] = BYTE_TO_CHAR (beg_byte
+ nbytes
);
1916 XSETBUFFER (last_thing_searched
, current_buffer
);
1919 /* Given a string of words separated by word delimiters,
1920 compute a regexp that matches those exact words
1921 separated by arbitrary punctuation. */
1927 register unsigned char *p
, *o
;
1928 register int i
, i_byte
, len
, punct_count
= 0, word_count
= 0;
1933 CHECK_STRING (string
, 0);
1934 p
= XSTRING (string
)->data
;
1935 len
= XSTRING (string
)->size
;
1937 for (i
= 0, i_byte
= 0; i
< len
; )
1941 if (STRING_MULTIBYTE (string
))
1942 FETCH_STRING_CHAR_ADVANCE (c
, string
, i
, i_byte
);
1944 c
= XSTRING (string
)->data
[i
++];
1946 if (SYNTAX (c
) != Sword
)
1949 if (i
> 0 && SYNTAX (prev_c
) == Sword
)
1956 if (SYNTAX (prev_c
) == Sword
)
1959 return build_string ("");
1961 adjust
= - punct_count
+ 5 * (word_count
- 1) + 4;
1962 val
= make_uninit_multibyte_string (len
+ adjust
,
1963 STRING_BYTES (XSTRING (string
)) + adjust
);
1965 o
= XSTRING (val
)->data
;
1970 for (i
= 0, i_byte
= 0; i
< len
; )
1973 int i_byte_orig
= i_byte
;
1975 if (STRING_MULTIBYTE (string
))
1976 FETCH_STRING_CHAR_ADVANCE (c
, string
, i
, i_byte
);
1978 c
= XSTRING (string
)->data
[i
++];
1980 if (SYNTAX (c
) == Sword
)
1982 bcopy (&XSTRING (string
)->data
[i_byte_orig
], o
,
1983 i_byte
- i_byte_orig
);
1984 o
+= i_byte
- i_byte_orig
;
1986 else if (i
> 0 && SYNTAX (prev_c
) == Sword
&& --word_count
)
2004 DEFUN ("search-backward", Fsearch_backward
, Ssearch_backward
, 1, 4,
2005 "MSearch backward: ",
2006 "Search backward from point for STRING.\n\
2007 Set point to the beginning of the occurrence found, and return point.\n\
2008 An optional second argument bounds the search; it is a buffer position.\n\
2009 The match found must not extend before that position.\n\
2010 Optional third argument, if t, means if fail just return nil (no error).\n\
2011 If not nil and not t, position at limit of search and return nil.\n\
2012 Optional fourth argument is repeat count--search for successive occurrences.\n\
2013 See also the functions `match-beginning', `match-end' and `replace-match'.")
2014 (string
, bound
, noerror
, count
)
2015 Lisp_Object string
, bound
, noerror
, count
;
2017 return search_command (string
, bound
, noerror
, count
, -1, 0, 0);
2020 DEFUN ("search-forward", Fsearch_forward
, Ssearch_forward
, 1, 4, "MSearch: ",
2021 "Search forward from point for STRING.\n\
2022 Set point to the end of the occurrence found, and return point.\n\
2023 An optional second argument bounds the search; it is a buffer position.\n\
2024 The match found must not extend after that position. nil is equivalent\n\
2026 Optional third argument, if t, means if fail just return nil (no error).\n\
2027 If not nil and not t, move to limit of search and return nil.\n\
2028 Optional fourth argument is repeat count--search for successive occurrences.\n\
2029 See also the functions `match-beginning', `match-end' and `replace-match'.")
2030 (string
, bound
, noerror
, count
)
2031 Lisp_Object string
, bound
, noerror
, count
;
2033 return search_command (string
, bound
, noerror
, count
, 1, 0, 0);
2036 DEFUN ("word-search-backward", Fword_search_backward
, Sword_search_backward
, 1, 4,
2037 "sWord search backward: ",
2038 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2039 Set point to the beginning of the occurrence found, and return point.\n\
2040 An optional second argument bounds the search; it is a buffer position.\n\
2041 The match found must not extend before that position.\n\
2042 Optional third argument, if t, means if fail just return nil (no error).\n\
2043 If not nil and not t, move to limit of search and return nil.\n\
2044 Optional fourth argument is repeat count--search for successive occurrences.")
2045 (string
, bound
, noerror
, count
)
2046 Lisp_Object string
, bound
, noerror
, count
;
2048 return search_command (wordify (string
), bound
, noerror
, count
, -1, 1, 0);
2051 DEFUN ("word-search-forward", Fword_search_forward
, Sword_search_forward
, 1, 4,
2053 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2054 Set point to the end of the occurrence found, and return point.\n\
2055 An optional second argument bounds the search; it is a buffer position.\n\
2056 The match found must not extend after that position.\n\
2057 Optional third argument, if t, means if fail just return nil (no error).\n\
2058 If not nil and not t, move to limit of search and return nil.\n\
2059 Optional fourth argument is repeat count--search for successive occurrences.")
2060 (string
, bound
, noerror
, count
)
2061 Lisp_Object string
, bound
, noerror
, count
;
2063 return search_command (wordify (string
), bound
, noerror
, count
, 1, 1, 0);
2066 DEFUN ("re-search-backward", Fre_search_backward
, Sre_search_backward
, 1, 4,
2067 "sRE search backward: ",
2068 "Search backward from point for match for regular expression REGEXP.\n\
2069 Set point to the beginning of the match, and return point.\n\
2070 The match found is the one starting last in the buffer\n\
2071 and yet ending before the origin of the search.\n\
2072 An optional second argument bounds the search; it is a buffer position.\n\
2073 The match found must start at or after that position.\n\
2074 Optional third argument, if t, means if fail just return nil (no error).\n\
2075 If not nil and not t, move to limit of search and return nil.\n\
2076 Optional fourth argument is repeat count--search for successive occurrences.\n\
2077 See also the functions `match-beginning', `match-end' and `replace-match'.")
2078 (regexp
, bound
, noerror
, count
)
2079 Lisp_Object regexp
, bound
, noerror
, count
;
2081 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 0);
2084 DEFUN ("re-search-forward", Fre_search_forward
, Sre_search_forward
, 1, 4,
2086 "Search forward from point for regular expression REGEXP.\n\
2087 Set point to the end of the occurrence found, and return point.\n\
2088 An optional second argument bounds the search; it is a buffer position.\n\
2089 The match found must not extend after that position.\n\
2090 Optional third argument, if t, means if fail just return nil (no error).\n\
2091 If not nil and not t, move to limit of search and return nil.\n\
2092 Optional fourth argument is repeat count--search for successive occurrences.\n\
2093 See also the functions `match-beginning', `match-end' and `replace-match'.")
2094 (regexp
, bound
, noerror
, count
)
2095 Lisp_Object regexp
, bound
, noerror
, count
;
2097 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 0);
2100 DEFUN ("posix-search-backward", Fposix_search_backward
, Sposix_search_backward
, 1, 4,
2101 "sPosix search backward: ",
2102 "Search backward from point for match for regular expression REGEXP.\n\
2103 Find the longest match in accord with Posix regular expression rules.\n\
2104 Set point to the beginning of the match, and return point.\n\
2105 The match found is the one starting last in the buffer\n\
2106 and yet ending before the origin of the search.\n\
2107 An optional second argument bounds the search; it is a buffer position.\n\
2108 The match found must start at or after that position.\n\
2109 Optional third argument, if t, means if fail just return nil (no error).\n\
2110 If not nil and not t, move to limit of search and return nil.\n\
2111 Optional fourth argument is repeat count--search for successive occurrences.\n\
2112 See also the functions `match-beginning', `match-end' and `replace-match'.")
2113 (regexp
, bound
, noerror
, count
)
2114 Lisp_Object regexp
, bound
, noerror
, count
;
2116 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 1);
2119 DEFUN ("posix-search-forward", Fposix_search_forward
, Sposix_search_forward
, 1, 4,
2121 "Search forward from point for regular expression REGEXP.\n\
2122 Find the longest match in accord with Posix regular expression rules.\n\
2123 Set point to the end of the occurrence found, and return point.\n\
2124 An optional second argument bounds the search; it is a buffer position.\n\
2125 The match found must not extend after that position.\n\
2126 Optional third argument, if t, means if fail just return nil (no error).\n\
2127 If not nil and not t, move to limit of search and return nil.\n\
2128 Optional fourth argument is repeat count--search for successive occurrences.\n\
2129 See also the functions `match-beginning', `match-end' and `replace-match'.")
2130 (regexp
, bound
, noerror
, count
)
2131 Lisp_Object regexp
, bound
, noerror
, count
;
2133 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 1);
2136 DEFUN ("replace-match", Freplace_match
, Sreplace_match
, 1, 5, 0,
2137 "Replace text matched by last search with NEWTEXT.\n\
2138 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2139 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2140 based on the replaced text.\n\
2141 If the replaced text has only capital letters\n\
2142 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2143 If the replaced text has at least one word starting with a capital letter,\n\
2144 then capitalize each word in NEWTEXT.\n\n\
2145 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2146 Otherwise treat `\\' as special:\n\
2147 `\\&' in NEWTEXT means substitute original matched text.\n\
2148 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2149 If Nth parens didn't match, substitute nothing.\n\
2150 `\\\\' means insert one `\\'.\n\
2151 FIXEDCASE and LITERAL are optional arguments.\n\
2152 Leaves point at end of replacement text.\n\
2154 The optional fourth argument STRING can be a string to modify.\n\
2155 In that case, this function creates and returns a new string\n\
2156 which is made by replacing the part of STRING that was matched.\n\
2158 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2159 It says to replace just that subexpression instead of the whole match.\n\
2160 This is useful only after a regular expression search or match\n\
2161 since only regular expressions have distinguished subexpressions.")
2162 (newtext
, fixedcase
, literal
, string
, subexp
)
2163 Lisp_Object newtext
, fixedcase
, literal
, string
, subexp
;
2165 enum { nochange
, all_caps
, cap_initial
} case_action
;
2166 register int pos
, last
;
2167 int some_multiletter_word
;
2170 int some_nonuppercase_initial
;
2171 register int c
, prevc
;
2174 int opoint
, newpoint
;
2176 CHECK_STRING (newtext
, 0);
2178 if (! NILP (string
))
2179 CHECK_STRING (string
, 4);
2181 case_action
= nochange
; /* We tried an initialization */
2182 /* but some C compilers blew it */
2184 if (search_regs
.num_regs
<= 0)
2185 error ("replace-match called before any match found");
2191 CHECK_NUMBER (subexp
, 3);
2192 sub
= XINT (subexp
);
2193 if (sub
< 0 || sub
>= search_regs
.num_regs
)
2194 args_out_of_range (subexp
, make_number (search_regs
.num_regs
));
2199 if (search_regs
.start
[sub
] < BEGV
2200 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2201 || search_regs
.end
[sub
] > ZV
)
2202 args_out_of_range (make_number (search_regs
.start
[sub
]),
2203 make_number (search_regs
.end
[sub
]));
2207 if (search_regs
.start
[sub
] < 0
2208 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2209 || search_regs
.end
[sub
] > XSTRING (string
)->size
)
2210 args_out_of_range (make_number (search_regs
.start
[sub
]),
2211 make_number (search_regs
.end
[sub
]));
2214 if (NILP (fixedcase
))
2217 /* Decide how to casify by examining the matched text. */
2220 last
= CHAR_TO_BYTE (search_regs
.end
[sub
]);
2222 last
= search_regs
.end
[sub
];
2225 beg
= CHAR_TO_BYTE (search_regs
.start
[sub
]);
2227 beg
= search_regs
.start
[sub
];
2230 case_action
= all_caps
;
2232 /* some_multiletter_word is set nonzero if any original word
2233 is more than one letter long. */
2234 some_multiletter_word
= 0;
2236 some_nonuppercase_initial
= 0;
2239 for (pos
= beg
; pos
< last
; pos
++)
2242 c
= FETCH_BYTE (pos
);
2244 c
= XSTRING (string
)->data
[pos
];
2248 /* Cannot be all caps if any original char is lower case */
2251 if (SYNTAX (prevc
) != Sword
)
2252 some_nonuppercase_initial
= 1;
2254 some_multiletter_word
= 1;
2256 else if (!NOCASEP (c
))
2259 if (SYNTAX (prevc
) != Sword
)
2262 some_multiletter_word
= 1;
2266 /* If the initial is a caseless word constituent,
2267 treat that like a lowercase initial. */
2268 if (SYNTAX (prevc
) != Sword
)
2269 some_nonuppercase_initial
= 1;
2275 /* Convert to all caps if the old text is all caps
2276 and has at least one multiletter word. */
2277 if (! some_lowercase
&& some_multiletter_word
)
2278 case_action
= all_caps
;
2279 /* Capitalize each word, if the old text has all capitalized words. */
2280 else if (!some_nonuppercase_initial
&& some_multiletter_word
)
2281 case_action
= cap_initial
;
2282 else if (!some_nonuppercase_initial
&& some_uppercase
)
2283 /* Should x -> yz, operating on X, give Yz or YZ?
2284 We'll assume the latter. */
2285 case_action
= all_caps
;
2287 case_action
= nochange
;
2290 /* Do replacement in a string. */
2293 Lisp_Object before
, after
;
2295 before
= Fsubstring (string
, make_number (0),
2296 make_number (search_regs
.start
[sub
]));
2297 after
= Fsubstring (string
, make_number (search_regs
.end
[sub
]), Qnil
);
2299 /* Substitute parts of the match into NEWTEXT
2304 int lastpos_byte
= 0;
2305 /* We build up the substituted string in ACCUM. */
2312 for (pos_byte
= 0, pos
= 0; pos_byte
< STRING_BYTES (XSTRING (newtext
));)
2316 int delbackslash
= 0;
2318 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2322 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2325 substart
= search_regs
.start
[sub
];
2326 subend
= search_regs
.end
[sub
];
2328 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
2330 if (search_regs
.start
[c
- '0'] >= 0)
2332 substart
= search_regs
.start
[c
- '0'];
2333 subend
= search_regs
.end
[c
- '0'];
2339 error ("Invalid use of `\\' in replacement text");
2343 if (pos
- 2 != lastpos
)
2344 middle
= substring_both (newtext
, lastpos
,
2346 pos
- 2, pos_byte
- 2);
2349 accum
= concat3 (accum
, middle
,
2351 make_number (substart
),
2352 make_number (subend
)));
2354 lastpos_byte
= pos_byte
;
2356 else if (delbackslash
)
2358 middle
= substring_both (newtext
, lastpos
,
2360 pos
- 1, pos_byte
- 1);
2362 accum
= concat2 (accum
, middle
);
2364 lastpos_byte
= pos_byte
;
2369 middle
= substring_both (newtext
, lastpos
,
2375 newtext
= concat2 (accum
, middle
);
2378 /* Do case substitution in NEWTEXT if desired. */
2379 if (case_action
== all_caps
)
2380 newtext
= Fupcase (newtext
);
2381 else if (case_action
== cap_initial
)
2382 newtext
= Fupcase_initials (newtext
);
2384 return concat3 (before
, newtext
, after
);
2387 /* Record point, the move (quietly) to the start of the match. */
2388 if (PT
> search_regs
.start
[sub
])
2393 TEMP_SET_PT (search_regs
.start
[sub
]);
2395 /* We insert the replacement text before the old text, and then
2396 delete the original text. This means that markers at the
2397 beginning or end of the original will float to the corresponding
2398 position in the replacement. */
2399 if (!NILP (literal
))
2400 Finsert_and_inherit (1, &newtext
);
2403 struct gcpro gcpro1
;
2406 for (pos
= 0; pos
< XSTRING (newtext
)->size
; pos
++)
2408 int offset
= PT
- search_regs
.start
[sub
];
2410 c
= XSTRING (newtext
)->data
[pos
];
2413 c
= XSTRING (newtext
)->data
[++pos
];
2415 Finsert_buffer_substring
2416 (Fcurrent_buffer (),
2417 make_number (search_regs
.start
[sub
] + offset
),
2418 make_number (search_regs
.end
[sub
] + offset
));
2419 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
2421 if (search_regs
.start
[c
- '0'] >= 1)
2422 Finsert_buffer_substring
2423 (Fcurrent_buffer (),
2424 make_number (search_regs
.start
[c
- '0'] + offset
),
2425 make_number (search_regs
.end
[c
- '0'] + offset
));
2430 error ("Invalid use of `\\' in replacement text");
2438 inslen
= PT
- (search_regs
.start
[sub
]);
2439 del_range (search_regs
.start
[sub
] + inslen
, search_regs
.end
[sub
] + inslen
);
2441 if (case_action
== all_caps
)
2442 Fupcase_region (make_number (PT
- inslen
), make_number (PT
));
2443 else if (case_action
== cap_initial
)
2444 Fupcase_initials_region (make_number (PT
- inslen
), make_number (PT
));
2448 /* Put point back where it was in the text. */
2450 TEMP_SET_PT (opoint
+ ZV
);
2452 TEMP_SET_PT (opoint
);
2454 /* Now move point "officially" to the start of the inserted replacement. */
2455 move_if_not_intangible (newpoint
);
2461 match_limit (num
, beginningp
)
2467 CHECK_NUMBER (num
, 0);
2469 if (n
< 0 || n
>= search_regs
.num_regs
)
2470 args_out_of_range (num
, make_number (search_regs
.num_regs
));
2471 if (search_regs
.num_regs
<= 0
2472 || search_regs
.start
[n
] < 0)
2474 return (make_number ((beginningp
) ? search_regs
.start
[n
]
2475 : search_regs
.end
[n
]));
2478 DEFUN ("match-beginning", Fmatch_beginning
, Smatch_beginning
, 1, 1, 0,
2479 "Return position of start of text matched by last search.\n\
2480 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2482 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2484 Zero means the entire text matched by the whole regexp or whole string.")
2488 return match_limit (subexp
, 1);
2491 DEFUN ("match-end", Fmatch_end
, Smatch_end
, 1, 1, 0,
2492 "Return position of end of text matched by last search.\n\
2493 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2495 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2497 Zero means the entire text matched by the whole regexp or whole string.")
2501 return match_limit (subexp
, 0);
2504 DEFUN ("match-data", Fmatch_data
, Smatch_data
, 0, 2, 0,
2505 "Return a list containing all info on what the last search matched.\n\
2506 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2507 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2508 if the last match was on a buffer; integers or nil if a string was matched.\n\
2509 Use `store-match-data' to reinstate the data in this list.\n\
2511 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2512 \(rather than markers) to represent buffer positions.\n\
2513 If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2514 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2516 Lisp_Object integers
, reuse
;
2518 Lisp_Object tail
, prev
;
2522 if (NILP (last_thing_searched
))
2525 data
= (Lisp_Object
*) alloca ((2 * search_regs
.num_regs
)
2526 * sizeof (Lisp_Object
));
2529 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2531 int start
= search_regs
.start
[i
];
2534 if (EQ (last_thing_searched
, Qt
)
2535 || ! NILP (integers
))
2537 XSETFASTINT (data
[2 * i
], start
);
2538 XSETFASTINT (data
[2 * i
+ 1], search_regs
.end
[i
]);
2540 else if (BUFFERP (last_thing_searched
))
2542 data
[2 * i
] = Fmake_marker ();
2543 Fset_marker (data
[2 * i
],
2544 make_number (start
),
2545 last_thing_searched
);
2546 data
[2 * i
+ 1] = Fmake_marker ();
2547 Fset_marker (data
[2 * i
+ 1],
2548 make_number (search_regs
.end
[i
]),
2549 last_thing_searched
);
2552 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2558 data
[2 * i
] = data
[2 * i
+ 1] = Qnil
;
2561 /* If REUSE is not usable, cons up the values and return them. */
2562 if (! CONSP (reuse
))
2563 return Flist (2 * len
+ 2, data
);
2565 /* If REUSE is a list, store as many value elements as will fit
2566 into the elements of REUSE. */
2567 for (i
= 0, tail
= reuse
; CONSP (tail
);
2568 i
++, tail
= XCONS (tail
)->cdr
)
2570 if (i
< 2 * len
+ 2)
2571 XCONS (tail
)->car
= data
[i
];
2573 XCONS (tail
)->car
= Qnil
;
2577 /* If we couldn't fit all value elements into REUSE,
2578 cons up the rest of them and add them to the end of REUSE. */
2579 if (i
< 2 * len
+ 2)
2580 XCONS (prev
)->cdr
= Flist (2 * len
+ 2 - i
, data
+ i
);
2586 DEFUN ("set-match-data", Fset_match_data
, Sset_match_data
, 1, 1, 0,
2587 "Set internal data on last search match from elements of LIST.\n\
2588 LIST should have been created by calling `match-data' previously.")
2590 register Lisp_Object list
;
2593 register Lisp_Object marker
;
2595 if (running_asynch_code
)
2596 save_search_regs ();
2598 if (!CONSP (list
) && !NILP (list
))
2599 list
= wrong_type_argument (Qconsp
, list
);
2601 /* Unless we find a marker with a buffer in LIST, assume that this
2602 match data came from a string. */
2603 last_thing_searched
= Qt
;
2605 /* Allocate registers if they don't already exist. */
2607 int length
= XFASTINT (Flength (list
)) / 2;
2609 if (length
> search_regs
.num_regs
)
2611 if (search_regs
.num_regs
== 0)
2614 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2616 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2621 = (regoff_t
*) xrealloc (search_regs
.start
,
2622 length
* sizeof (regoff_t
));
2624 = (regoff_t
*) xrealloc (search_regs
.end
,
2625 length
* sizeof (regoff_t
));
2628 search_regs
.num_regs
= length
;
2632 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2634 marker
= Fcar (list
);
2637 search_regs
.start
[i
] = -1;
2642 if (MARKERP (marker
))
2644 if (XMARKER (marker
)->buffer
== 0)
2645 XSETFASTINT (marker
, 0);
2647 XSETBUFFER (last_thing_searched
, XMARKER (marker
)->buffer
);
2650 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
2651 search_regs
.start
[i
] = XINT (marker
);
2654 marker
= Fcar (list
);
2655 if (MARKERP (marker
) && XMARKER (marker
)->buffer
== 0)
2656 XSETFASTINT (marker
, 0);
2658 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
2659 search_regs
.end
[i
] = XINT (marker
);
2667 /* If non-zero the match data have been saved in saved_search_regs
2668 during the execution of a sentinel or filter. */
2669 static int search_regs_saved
;
2670 static struct re_registers saved_search_regs
;
2672 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2673 if asynchronous code (filter or sentinel) is running. */
2677 if (!search_regs_saved
)
2679 saved_search_regs
.num_regs
= search_regs
.num_regs
;
2680 saved_search_regs
.start
= search_regs
.start
;
2681 saved_search_regs
.end
= search_regs
.end
;
2682 search_regs
.num_regs
= 0;
2683 search_regs
.start
= 0;
2684 search_regs
.end
= 0;
2686 search_regs_saved
= 1;
2690 /* Called upon exit from filters and sentinels. */
2692 restore_match_data ()
2694 if (search_regs_saved
)
2696 if (search_regs
.num_regs
> 0)
2698 xfree (search_regs
.start
);
2699 xfree (search_regs
.end
);
2701 search_regs
.num_regs
= saved_search_regs
.num_regs
;
2702 search_regs
.start
= saved_search_regs
.start
;
2703 search_regs
.end
= saved_search_regs
.end
;
2705 search_regs_saved
= 0;
2709 /* Quote a string to inactivate reg-expr chars */
2711 DEFUN ("regexp-quote", Fregexp_quote
, Sregexp_quote
, 1, 1, 0,
2712 "Return a regexp string which matches exactly STRING and nothing else.")
2716 register unsigned char *in
, *out
, *end
;
2717 register unsigned char *temp
;
2718 int backslashes_added
= 0;
2720 CHECK_STRING (string
, 0);
2722 temp
= (unsigned char *) alloca (STRING_BYTES (XSTRING (string
)) * 2);
2724 /* Now copy the data into the new string, inserting escapes. */
2726 in
= XSTRING (string
)->data
;
2727 end
= in
+ STRING_BYTES (XSTRING (string
));
2730 for (; in
!= end
; in
++)
2732 if (*in
== '[' || *in
== ']'
2733 || *in
== '*' || *in
== '.' || *in
== '\\'
2734 || *in
== '?' || *in
== '+'
2735 || *in
== '^' || *in
== '$')
2736 *out
++ = '\\', backslashes_added
++;
2740 return make_specified_string (temp
,
2741 XSTRING (string
)->size
+ backslashes_added
,
2743 STRING_MULTIBYTE (string
));
2751 for (i
= 0; i
< REGEXP_CACHE_SIZE
; ++i
)
2753 searchbufs
[i
].buf
.allocated
= 100;
2754 searchbufs
[i
].buf
.buffer
= (unsigned char *) malloc (100);
2755 searchbufs
[i
].buf
.fastmap
= searchbufs
[i
].fastmap
;
2756 searchbufs
[i
].regexp
= Qnil
;
2757 staticpro (&searchbufs
[i
].regexp
);
2758 searchbufs
[i
].next
= (i
== REGEXP_CACHE_SIZE
-1 ? 0 : &searchbufs
[i
+1]);
2760 searchbuf_head
= &searchbufs
[0];
2762 Qsearch_failed
= intern ("search-failed");
2763 staticpro (&Qsearch_failed
);
2764 Qinvalid_regexp
= intern ("invalid-regexp");
2765 staticpro (&Qinvalid_regexp
);
2767 Fput (Qsearch_failed
, Qerror_conditions
,
2768 Fcons (Qsearch_failed
, Fcons (Qerror
, Qnil
)));
2769 Fput (Qsearch_failed
, Qerror_message
,
2770 build_string ("Search failed"));
2772 Fput (Qinvalid_regexp
, Qerror_conditions
,
2773 Fcons (Qinvalid_regexp
, Fcons (Qerror
, Qnil
)));
2774 Fput (Qinvalid_regexp
, Qerror_message
,
2775 build_string ("Invalid regexp"));
2777 last_thing_searched
= Qnil
;
2778 staticpro (&last_thing_searched
);
2780 defsubr (&Slooking_at
);
2781 defsubr (&Sposix_looking_at
);
2782 defsubr (&Sstring_match
);
2783 defsubr (&Sposix_string_match
);
2784 defsubr (&Ssearch_forward
);
2785 defsubr (&Ssearch_backward
);
2786 defsubr (&Sword_search_forward
);
2787 defsubr (&Sword_search_backward
);
2788 defsubr (&Sre_search_forward
);
2789 defsubr (&Sre_search_backward
);
2790 defsubr (&Sposix_search_forward
);
2791 defsubr (&Sposix_search_backward
);
2792 defsubr (&Sreplace_match
);
2793 defsubr (&Smatch_beginning
);
2794 defsubr (&Smatch_end
);
2795 defsubr (&Smatch_data
);
2796 defsubr (&Sset_match_data
);
2797 defsubr (&Sregexp_quote
);