1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
4 This file is part of GNU Emacs.
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
31 #include "region-cache.h"
33 #include "blockinput.h"
34 #include "intervals.h"
36 #include <sys/types.h>
39 #define min(a, b) ((a) < (b) ? (a) : (b))
40 #define max(a, b) ((a) > (b) ? (a) : (b))
42 #define REGEXP_CACHE_SIZE 20
44 /* If the regexp is non-nil, then the buffer contains the compiled form
45 of that regexp, suitable for searching. */
48 struct regexp_cache
*next
;
50 struct re_pattern_buffer buf
;
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs
[REGEXP_CACHE_SIZE
];
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache
*searchbuf_head
;
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs
;
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched
;
87 /* error condition signaled when regexp compile_pattern fails */
89 Lisp_Object Qinvalid_regexp
;
91 static void set_search_regs ();
92 static void save_search_regs ();
93 static int simple_search ();
94 static int boyer_moore ();
95 static int search_buffer ();
100 error ("Stack overflow in regexp matcher");
109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
112 TRANSLATE is a translation table for ignoring case, or nil for none.
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
118 for this pattern. 0 means backtrack only enough to get a valid match.
119 MULTIBYTE is nonzero if we want to handle multibyte characters in
120 PATTERN. 0 means all multibyte characters are recognized just as
121 sequences of binary data. */
124 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
)
125 struct regexp_cache
*cp
;
127 Lisp_Object translate
;
128 struct re_registers
*regp
;
132 unsigned char *raw_pattern
;
133 int raw_pattern_size
;
137 /* MULTIBYTE says whether the text to be searched is multibyte.
138 We must convert PATTERN to match that, or we will not really
139 find things right. */
141 if (multibyte
== STRING_MULTIBYTE (pattern
))
143 raw_pattern
= (unsigned char *) XSTRING (pattern
)->data
;
144 raw_pattern_size
= STRING_BYTES (XSTRING (pattern
));
148 raw_pattern_size
= count_size_as_multibyte (XSTRING (pattern
)->data
,
149 XSTRING (pattern
)->size
);
150 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
151 copy_text (XSTRING (pattern
)->data
, raw_pattern
,
152 XSTRING (pattern
)->size
, 0, 1);
156 /* Converting multibyte to single-byte.
158 ??? Perhaps this conversion should be done in a special way
159 by subtracting nonascii-insert-offset from each non-ASCII char,
160 so that only the multibyte chars which really correspond to
161 the chosen single-byte character set can possibly match. */
162 raw_pattern_size
= XSTRING (pattern
)->size
;
163 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
164 copy_text (XSTRING (pattern
)->data
, raw_pattern
,
165 STRING_BYTES (XSTRING (pattern
)), 1, 0);
169 cp
->buf
.translate
= (! NILP (translate
) ? translate
: make_number (0));
171 cp
->buf
.multibyte
= multibyte
;
173 old
= re_set_syntax (RE_SYNTAX_EMACS
174 | (posix
? 0 : RE_NO_POSIX_BACKTRACKING
));
175 val
= (char *) re_compile_pattern ((char *)raw_pattern
,
176 raw_pattern_size
, &cp
->buf
);
180 Fsignal (Qinvalid_regexp
, Fcons (build_string (val
), Qnil
));
182 cp
->regexp
= Fcopy_sequence (pattern
);
185 /* Compile a regexp if necessary, but first check to see if there's one in
187 PATTERN is the pattern to compile.
188 TRANSLATE is a translation table for ignoring case, or nil for none.
189 REGP is the structure that says where to store the "register"
190 values that will result from matching this pattern.
191 If it is 0, we should compile the pattern not to record any
192 subexpression bounds.
193 POSIX is nonzero if we want full backtracking (POSIX style)
194 for this pattern. 0 means backtrack only enough to get a valid match. */
196 struct re_pattern_buffer
*
197 compile_pattern (pattern
, regp
, translate
, posix
, multibyte
)
199 struct re_registers
*regp
;
200 Lisp_Object translate
;
201 int posix
, multibyte
;
203 struct regexp_cache
*cp
, **cpp
;
205 for (cpp
= &searchbuf_head
; ; cpp
= &cp
->next
)
208 if (XSTRING (cp
->regexp
)->size
== XSTRING (pattern
)->size
209 && !NILP (Fstring_equal (cp
->regexp
, pattern
))
210 && EQ (cp
->buf
.translate
, (! NILP (translate
) ? translate
: make_number (0)))
211 && cp
->posix
== posix
212 && cp
->buf
.multibyte
== multibyte
)
215 /* If we're at the end of the cache, compile into the last cell. */
218 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
, multibyte
);
223 /* When we get here, cp (aka *cpp) contains the compiled pattern,
224 either because we found it in the cache or because we just compiled it.
225 Move it to the front of the queue to mark it as most recently used. */
227 cp
->next
= searchbuf_head
;
230 /* Advise the searching functions about the space we have allocated
231 for register data. */
233 re_set_registers (&cp
->buf
, regp
, regp
->num_regs
, regp
->start
, regp
->end
);
238 /* Error condition used for failing searches */
239 Lisp_Object Qsearch_failed
;
245 Fsignal (Qsearch_failed
, Fcons (arg
, Qnil
));
250 looking_at_1 (string
, posix
)
255 unsigned char *p1
, *p2
;
258 struct re_pattern_buffer
*bufp
;
260 if (running_asynch_code
)
263 CHECK_STRING (string
, 0);
264 bufp
= compile_pattern (string
, &search_regs
,
265 (!NILP (current_buffer
->case_fold_search
)
266 ? DOWNCASE_TABLE
: Qnil
),
268 !NILP (current_buffer
->enable_multibyte_characters
));
271 QUIT
; /* Do a pending quit right away, to avoid paradoxical behavior */
273 /* Get pointers and sizes of the two strings
274 that make up the visible portion of the buffer. */
277 s1
= GPT_BYTE
- BEGV_BYTE
;
279 s2
= ZV_BYTE
- GPT_BYTE
;
283 s2
= ZV_BYTE
- BEGV_BYTE
;
288 s1
= ZV_BYTE
- BEGV_BYTE
;
292 re_match_object
= Qnil
;
294 i
= re_match_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
295 PT_BYTE
- BEGV_BYTE
, &search_regs
,
296 ZV_BYTE
- BEGV_BYTE
);
300 val
= (0 <= i
? Qt
: Qnil
);
302 for (i
= 0; i
< search_regs
.num_regs
; i
++)
303 if (search_regs
.start
[i
] >= 0)
306 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
308 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
310 XSETBUFFER (last_thing_searched
, current_buffer
);
315 DEFUN ("looking-at", Flooking_at
, Slooking_at
, 1, 1, 0,
316 "Return t if text after point matches regular expression REGEXP.\n\
317 This function modifies the match data that `match-beginning',\n\
318 `match-end' and `match-data' access; save and restore the match\n\
319 data if you want to preserve them.")
323 return looking_at_1 (regexp
, 0);
326 DEFUN ("posix-looking-at", Fposix_looking_at
, Sposix_looking_at
, 1, 1, 0,
327 "Return t if text after point matches regular expression REGEXP.\n\
328 Find the longest match, in accord with Posix regular expression rules.\n\
329 This function modifies the match data that `match-beginning',\n\
330 `match-end' and `match-data' access; save and restore the match\n\
331 data if you want to preserve them.")
335 return looking_at_1 (regexp
, 1);
339 string_match_1 (regexp
, string
, start
, posix
)
340 Lisp_Object regexp
, string
, start
;
344 struct re_pattern_buffer
*bufp
;
348 if (running_asynch_code
)
351 CHECK_STRING (regexp
, 0);
352 CHECK_STRING (string
, 1);
355 pos
= 0, pos_byte
= 0;
358 int len
= XSTRING (string
)->size
;
360 CHECK_NUMBER (start
, 2);
362 if (pos
< 0 && -pos
<= len
)
364 else if (0 > pos
|| pos
> len
)
365 args_out_of_range (string
, start
);
366 pos_byte
= string_char_to_byte (string
, pos
);
369 bufp
= compile_pattern (regexp
, &search_regs
,
370 (!NILP (current_buffer
->case_fold_search
)
371 ? DOWNCASE_TABLE
: Qnil
),
373 STRING_MULTIBYTE (string
));
375 re_match_object
= string
;
377 val
= re_search (bufp
, (char *) XSTRING (string
)->data
,
378 STRING_BYTES (XSTRING (string
)), pos_byte
,
379 STRING_BYTES (XSTRING (string
)) - pos_byte
,
382 last_thing_searched
= Qt
;
385 if (val
< 0) return Qnil
;
387 for (i
= 0; i
< search_regs
.num_regs
; i
++)
388 if (search_regs
.start
[i
] >= 0)
391 = string_byte_to_char (string
, search_regs
.start
[i
]);
393 = string_byte_to_char (string
, search_regs
.end
[i
]);
396 return make_number (string_byte_to_char (string
, val
));
399 DEFUN ("string-match", Fstring_match
, Sstring_match
, 2, 3, 0,
400 "Return index of start of first match for REGEXP in STRING, or nil.\n\
401 If third arg START is non-nil, start search at that index in STRING.\n\
402 For index of first char beyond the match, do (match-end 0).\n\
403 `match-end' and `match-beginning' also give indices of substrings\n\
404 matched by parenthesis constructs in the pattern.")
405 (regexp
, string
, start
)
406 Lisp_Object regexp
, string
, start
;
408 return string_match_1 (regexp
, string
, start
, 0);
411 DEFUN ("posix-string-match", Fposix_string_match
, Sposix_string_match
, 2, 3, 0,
412 "Return index of start of first match for REGEXP in STRING, or nil.\n\
413 Find the longest match, in accord with Posix regular expression rules.\n\
414 If third arg START is non-nil, start search at that index in STRING.\n\
415 For index of first char beyond the match, do (match-end 0).\n\
416 `match-end' and `match-beginning' also give indices of substrings\n\
417 matched by parenthesis constructs in the pattern.")
418 (regexp
, string
, start
)
419 Lisp_Object regexp
, string
, start
;
421 return string_match_1 (regexp
, string
, start
, 1);
424 /* Match REGEXP against STRING, searching all of STRING,
425 and return the index of the match, or negative on failure.
426 This does not clobber the match data. */
429 fast_string_match (regexp
, string
)
430 Lisp_Object regexp
, string
;
433 struct re_pattern_buffer
*bufp
;
435 bufp
= compile_pattern (regexp
, 0, Qnil
,
436 0, STRING_MULTIBYTE (string
));
438 re_match_object
= string
;
440 val
= re_search (bufp
, (char *) XSTRING (string
)->data
,
441 STRING_BYTES (XSTRING (string
)), 0,
442 STRING_BYTES (XSTRING (string
)), 0);
447 /* Match REGEXP against STRING, searching all of STRING ignoring case,
448 and return the index of the match, or negative on failure.
449 This does not clobber the match data.
450 We assume that STRING contains single-byte characters. */
452 extern Lisp_Object Vascii_downcase_table
;
455 fast_c_string_match_ignore_case (regexp
, string
)
460 struct re_pattern_buffer
*bufp
;
461 int len
= strlen (string
);
463 regexp
= string_make_unibyte (regexp
);
464 re_match_object
= Qt
;
465 bufp
= compile_pattern (regexp
, 0,
466 Vascii_downcase_table
, 0,
469 val
= re_search (bufp
, string
, len
, 0, len
, 0);
474 /* The newline cache: remembering which sections of text have no newlines. */
476 /* If the user has requested newline caching, make sure it's on.
477 Otherwise, make sure it's off.
478 This is our cheezy way of associating an action with the change of
479 state of a buffer-local variable. */
481 newline_cache_on_off (buf
)
484 if (NILP (buf
->cache_long_line_scans
))
486 /* It should be off. */
487 if (buf
->newline_cache
)
489 free_region_cache (buf
->newline_cache
);
490 buf
->newline_cache
= 0;
495 /* It should be on. */
496 if (buf
->newline_cache
== 0)
497 buf
->newline_cache
= new_region_cache ();
502 /* Search for COUNT instances of the character TARGET between START and END.
504 If COUNT is positive, search forwards; END must be >= START.
505 If COUNT is negative, search backwards for the -COUNTth instance;
506 END must be <= START.
507 If COUNT is zero, do anything you please; run rogue, for all I care.
509 If END is zero, use BEGV or ZV instead, as appropriate for the
510 direction indicated by COUNT.
512 If we find COUNT instances, set *SHORTAGE to zero, and return the
513 position after the COUNTth match. Note that for reverse motion
514 this is not the same as the usual convention for Emacs motion commands.
516 If we don't find COUNT instances before reaching END, set *SHORTAGE
517 to the number of TARGETs left unfound, and return END.
519 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
520 except when inside redisplay. */
523 scan_buffer (target
, start
, end
, count
, shortage
, allow_quit
)
530 struct region_cache
*newline_cache
;
541 if (! end
) end
= BEGV
;
544 newline_cache_on_off (current_buffer
);
545 newline_cache
= current_buffer
->newline_cache
;
550 immediate_quit
= allow_quit
;
555 /* Our innermost scanning loop is very simple; it doesn't know
556 about gaps, buffer ends, or the newline cache. ceiling is
557 the position of the last character before the next such
558 obstacle --- the last character the dumb search loop should
560 int ceiling_byte
= CHAR_TO_BYTE (end
) - 1;
561 int start_byte
= CHAR_TO_BYTE (start
);
564 /* If we're looking for a newline, consult the newline cache
565 to see where we can avoid some scanning. */
566 if (target
== '\n' && newline_cache
)
570 while (region_cache_forward
571 (current_buffer
, newline_cache
, start_byte
, &next_change
))
572 start_byte
= next_change
;
573 immediate_quit
= allow_quit
;
575 /* START should never be after END. */
576 if (start_byte
> ceiling_byte
)
577 start_byte
= ceiling_byte
;
579 /* Now the text after start is an unknown region, and
580 next_change is the position of the next known region. */
581 ceiling_byte
= min (next_change
- 1, ceiling_byte
);
584 /* The dumb loop can only scan text stored in contiguous
585 bytes. BUFFER_CEILING_OF returns the last character
586 position that is contiguous, so the ceiling is the
587 position after that. */
588 tem
= BUFFER_CEILING_OF (start_byte
);
589 ceiling_byte
= min (tem
, ceiling_byte
);
592 /* The termination address of the dumb loop. */
593 register unsigned char *ceiling_addr
594 = BYTE_POS_ADDR (ceiling_byte
) + 1;
595 register unsigned char *cursor
596 = BYTE_POS_ADDR (start_byte
);
597 unsigned char *base
= cursor
;
599 while (cursor
< ceiling_addr
)
601 unsigned char *scan_start
= cursor
;
604 while (*cursor
!= target
&& ++cursor
< ceiling_addr
)
607 /* If we're looking for newlines, cache the fact that
608 the region from start to cursor is free of them. */
609 if (target
== '\n' && newline_cache
)
610 know_region_cache (current_buffer
, newline_cache
,
611 start_byte
+ scan_start
- base
,
612 start_byte
+ cursor
- base
);
614 /* Did we find the target character? */
615 if (cursor
< ceiling_addr
)
620 return BYTE_TO_CHAR (start_byte
+ cursor
- base
+ 1);
626 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
632 /* The last character to check before the next obstacle. */
633 int ceiling_byte
= CHAR_TO_BYTE (end
);
634 int start_byte
= CHAR_TO_BYTE (start
);
637 /* Consult the newline cache, if appropriate. */
638 if (target
== '\n' && newline_cache
)
642 while (region_cache_backward
643 (current_buffer
, newline_cache
, start_byte
, &next_change
))
644 start_byte
= next_change
;
645 immediate_quit
= allow_quit
;
647 /* Start should never be at or before end. */
648 if (start_byte
<= ceiling_byte
)
649 start_byte
= ceiling_byte
+ 1;
651 /* Now the text before start is an unknown region, and
652 next_change is the position of the next known region. */
653 ceiling_byte
= max (next_change
, ceiling_byte
);
656 /* Stop scanning before the gap. */
657 tem
= BUFFER_FLOOR_OF (start_byte
- 1);
658 ceiling_byte
= max (tem
, ceiling_byte
);
661 /* The termination address of the dumb loop. */
662 register unsigned char *ceiling_addr
= BYTE_POS_ADDR (ceiling_byte
);
663 register unsigned char *cursor
= BYTE_POS_ADDR (start_byte
- 1);
664 unsigned char *base
= cursor
;
666 while (cursor
>= ceiling_addr
)
668 unsigned char *scan_start
= cursor
;
670 while (*cursor
!= target
&& --cursor
>= ceiling_addr
)
673 /* If we're looking for newlines, cache the fact that
674 the region from after the cursor to start is free of them. */
675 if (target
== '\n' && newline_cache
)
676 know_region_cache (current_buffer
, newline_cache
,
677 start_byte
+ cursor
- base
,
678 start_byte
+ scan_start
- base
);
680 /* Did we find the target character? */
681 if (cursor
>= ceiling_addr
)
686 return BYTE_TO_CHAR (start_byte
+ cursor
- base
);
692 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
698 *shortage
= count
* direction
;
702 /* Search for COUNT instances of a line boundary, which means either a
703 newline or (if selective display enabled) a carriage return.
704 Start at START. If COUNT is negative, search backwards.
706 We report the resulting position by calling TEMP_SET_PT_BOTH.
708 If we find COUNT instances. we position after (always after,
709 even if scanning backwards) the COUNTth match, and return 0.
711 If we don't find COUNT instances before reaching the end of the
712 buffer (or the beginning, if scanning backwards), we return
713 the number of line boundaries left unfound, and position at
714 the limit we bumped up against.
716 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
717 except in special cases. */
720 scan_newline (start
, start_byte
, limit
, limit_byte
, count
, allow_quit
)
721 int start
, start_byte
;
722 int limit
, limit_byte
;
726 int direction
= ((count
> 0) ? 1 : -1);
728 register unsigned char *cursor
;
731 register int ceiling
;
732 register unsigned char *ceiling_addr
;
734 int old_immediate_quit
= immediate_quit
;
736 /* If we are not in selective display mode,
737 check only for newlines. */
738 int selective_display
= (!NILP (current_buffer
->selective_display
)
739 && !INTEGERP (current_buffer
->selective_display
));
741 /* The code that follows is like scan_buffer
742 but checks for either newline or carriage return. */
747 start_byte
= CHAR_TO_BYTE (start
);
751 while (start_byte
< limit_byte
)
753 ceiling
= BUFFER_CEILING_OF (start_byte
);
754 ceiling
= min (limit_byte
- 1, ceiling
);
755 ceiling_addr
= BYTE_POS_ADDR (ceiling
) + 1;
756 base
= (cursor
= BYTE_POS_ADDR (start_byte
));
759 while (*cursor
!= '\n' && ++cursor
!= ceiling_addr
)
762 if (cursor
!= ceiling_addr
)
766 immediate_quit
= old_immediate_quit
;
767 start_byte
= start_byte
+ cursor
- base
+ 1;
768 start
= BYTE_TO_CHAR (start_byte
);
769 TEMP_SET_PT_BOTH (start
, start_byte
);
773 if (++cursor
== ceiling_addr
)
779 start_byte
+= cursor
- base
;
784 while (start_byte
> limit_byte
)
786 ceiling
= BUFFER_FLOOR_OF (start_byte
- 1);
787 ceiling
= max (limit_byte
, ceiling
);
788 ceiling_addr
= BYTE_POS_ADDR (ceiling
) - 1;
789 base
= (cursor
= BYTE_POS_ADDR (start_byte
- 1) + 1);
792 while (--cursor
!= ceiling_addr
&& *cursor
!= '\n')
795 if (cursor
!= ceiling_addr
)
799 immediate_quit
= old_immediate_quit
;
800 /* Return the position AFTER the match we found. */
801 start_byte
= start_byte
+ cursor
- base
+ 1;
802 start
= BYTE_TO_CHAR (start_byte
);
803 TEMP_SET_PT_BOTH (start
, start_byte
);
810 /* Here we add 1 to compensate for the last decrement
811 of CURSOR, which took it past the valid range. */
812 start_byte
+= cursor
- base
+ 1;
816 TEMP_SET_PT_BOTH (limit
, limit_byte
);
817 immediate_quit
= old_immediate_quit
;
819 return count
* direction
;
823 find_next_newline_no_quit (from
, cnt
)
824 register int from
, cnt
;
826 return scan_buffer ('\n', from
, 0, cnt
, (int *) 0, 0);
829 /* Like find_next_newline, but returns position before the newline,
830 not after, and only search up to TO. This isn't just
831 find_next_newline (...)-1, because you might hit TO. */
834 find_before_next_newline (from
, to
, cnt
)
838 int pos
= scan_buffer ('\n', from
, to
, cnt
, &shortage
, 1);
846 /* Subroutines of Lisp buffer search functions. */
849 search_command (string
, bound
, noerror
, count
, direction
, RE
, posix
)
850 Lisp_Object string
, bound
, noerror
, count
;
861 CHECK_NUMBER (count
, 3);
865 CHECK_STRING (string
, 0);
869 lim
= ZV
, lim_byte
= ZV_BYTE
;
871 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
875 CHECK_NUMBER_COERCE_MARKER (bound
, 1);
877 if (n
> 0 ? lim
< PT
: lim
> PT
)
878 error ("Invalid search bound (wrong side of point)");
880 lim
= ZV
, lim_byte
= ZV_BYTE
;
882 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
884 lim_byte
= CHAR_TO_BYTE (lim
);
887 np
= search_buffer (string
, PT
, PT_BYTE
, lim
, lim_byte
, n
, RE
,
888 (!NILP (current_buffer
->case_fold_search
)
889 ? current_buffer
->case_canon_table
891 (!NILP (current_buffer
->case_fold_search
)
892 ? current_buffer
->case_eqv_table
898 return signal_failure (string
);
899 if (!EQ (noerror
, Qt
))
901 if (lim
< BEGV
|| lim
> ZV
)
903 SET_PT_BOTH (lim
, lim_byte
);
905 #if 0 /* This would be clean, but maybe programs depend on
906 a value of nil here. */
914 if (np
< BEGV
|| np
> ZV
)
919 return make_number (np
);
922 /* Return 1 if REGEXP it matches just one constant string. */
925 trivial_regexp_p (regexp
)
928 int len
= STRING_BYTES (XSTRING (regexp
));
929 unsigned char *s
= XSTRING (regexp
)->data
;
935 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
942 case '|': case '(': case ')': case '`': case '\'': case 'b':
943 case 'B': case '<': case '>': case 'w': case 'W': case 's':
945 case 'c': case 'C': /* for categoryspec and notcategoryspec */
946 case '1': case '2': case '3': case '4': case '5':
947 case '6': case '7': case '8': case '9':
955 /* Search for the n'th occurrence of STRING in the current buffer,
956 starting at position POS and stopping at position LIM,
957 treating STRING as a literal string if RE is false or as
958 a regular expression if RE is true.
960 If N is positive, searching is forward and LIM must be greater than POS.
961 If N is negative, searching is backward and LIM must be less than POS.
963 Returns -x if x occurrences remain to be found (x > 0),
964 or else the position at the beginning of the Nth occurrence
965 (if searching backward) or the end (if searching forward).
967 POSIX is nonzero if we want full backtracking (POSIX style)
968 for this pattern. 0 means backtrack only enough to get a valid match. */
970 #define TRANSLATE(out, trt, d) \
976 temp = Faref (trt, make_number (d)); \
977 if (INTEGERP (temp)) \
988 search_buffer (string
, pos
, pos_byte
, lim
, lim_byte
, n
,
989 RE
, trt
, inverse_trt
, posix
)
998 Lisp_Object inverse_trt
;
1001 int len
= XSTRING (string
)->size
;
1002 int len_byte
= STRING_BYTES (XSTRING (string
));
1005 if (running_asynch_code
)
1006 save_search_regs ();
1008 /* Searching 0 times means don't move. */
1009 /* Null string is found at starting position. */
1010 if (len
== 0 || n
== 0)
1012 set_search_regs (pos
, 0);
1016 if (RE
&& !trivial_regexp_p (string
))
1018 unsigned char *p1
, *p2
;
1020 struct re_pattern_buffer
*bufp
;
1022 bufp
= compile_pattern (string
, &search_regs
, trt
, posix
,
1023 !NILP (current_buffer
->enable_multibyte_characters
));
1025 immediate_quit
= 1; /* Quit immediately if user types ^G,
1026 because letting this function finish
1027 can take too long. */
1028 QUIT
; /* Do a pending quit right away,
1029 to avoid paradoxical behavior */
1030 /* Get pointers and sizes of the two strings
1031 that make up the visible portion of the buffer. */
1034 s1
= GPT_BYTE
- BEGV_BYTE
;
1036 s2
= ZV_BYTE
- GPT_BYTE
;
1040 s2
= ZV_BYTE
- BEGV_BYTE
;
1045 s1
= ZV_BYTE
- BEGV_BYTE
;
1048 re_match_object
= Qnil
;
1053 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1054 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1056 /* Don't allow match past current point */
1057 pos_byte
- BEGV_BYTE
);
1060 matcher_overflow ();
1064 pos_byte
= search_regs
.start
[0] + BEGV_BYTE
;
1065 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1066 if (search_regs
.start
[i
] >= 0)
1068 search_regs
.start
[i
]
1069 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1071 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1073 XSETBUFFER (last_thing_searched
, current_buffer
);
1074 /* Set pos to the new position. */
1075 pos
= search_regs
.start
[0];
1087 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1088 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1090 lim_byte
- BEGV_BYTE
);
1093 matcher_overflow ();
1097 pos_byte
= search_regs
.end
[0] + BEGV_BYTE
;
1098 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1099 if (search_regs
.start
[i
] >= 0)
1101 search_regs
.start
[i
]
1102 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1104 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1106 XSETBUFFER (last_thing_searched
, current_buffer
);
1107 pos
= search_regs
.end
[0];
1119 else /* non-RE case */
1121 unsigned char *raw_pattern
, *pat
;
1122 int raw_pattern_size
;
1123 int raw_pattern_size_byte
;
1124 unsigned char *patbuf
;
1125 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
1126 unsigned char *base_pat
= XSTRING (string
)->data
;
1127 int charset_base
= -1;
1130 /* MULTIBYTE says whether the text to be searched is multibyte.
1131 We must convert PATTERN to match that, or we will not really
1132 find things right. */
1134 if (multibyte
== STRING_MULTIBYTE (string
))
1136 raw_pattern
= (unsigned char *) XSTRING (string
)->data
;
1137 raw_pattern_size
= XSTRING (string
)->size
;
1138 raw_pattern_size_byte
= STRING_BYTES (XSTRING (string
));
1142 raw_pattern_size
= XSTRING (string
)->size
;
1143 raw_pattern_size_byte
1144 = count_size_as_multibyte (XSTRING (string
)->data
,
1146 raw_pattern
= (unsigned char *) alloca (raw_pattern_size_byte
+ 1);
1147 copy_text (XSTRING (string
)->data
, raw_pattern
,
1148 XSTRING (string
)->size
, 0, 1);
1152 /* Converting multibyte to single-byte.
1154 ??? Perhaps this conversion should be done in a special way
1155 by subtracting nonascii-insert-offset from each non-ASCII char,
1156 so that only the multibyte chars which really correspond to
1157 the chosen single-byte character set can possibly match. */
1158 raw_pattern_size
= XSTRING (string
)->size
;
1159 raw_pattern_size_byte
= XSTRING (string
)->size
;
1160 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
1161 copy_text (XSTRING (string
)->data
, raw_pattern
,
1162 STRING_BYTES (XSTRING (string
)), 1, 0);
1165 /* Copy and optionally translate the pattern. */
1166 len
= raw_pattern_size
;
1167 len_byte
= raw_pattern_size_byte
;
1168 patbuf
= (unsigned char *) alloca (len_byte
);
1170 base_pat
= raw_pattern
;
1175 unsigned char workbuf
[4], *str
;
1176 int c
, translated
, inverse
;
1177 int in_charlen
, charlen
;
1179 /* If we got here and the RE flag is set, it's because we're
1180 dealing with a regexp known to be trivial, so the backslash
1181 just quotes the next character. */
1182 if (RE
&& *base_pat
== '\\')
1189 c
= STRING_CHAR_AND_LENGTH (base_pat
, len_byte
, in_charlen
);
1190 /* Translate the character, if requested. */
1191 TRANSLATE (translated
, trt
, c
);
1192 /* If translation changed the byte-length, go back
1193 to the original character. */
1194 charlen
= CHAR_STRING (translated
, workbuf
, str
);
1195 if (in_charlen
!= charlen
)
1198 charlen
= CHAR_STRING (c
, workbuf
, str
);
1201 TRANSLATE (inverse
, inverse_trt
, c
);
1203 /* Did this char actually get translated?
1204 Would any other char get translated into it? */
1205 if (translated
!= c
|| inverse
!= c
)
1207 /* Keep track of which character set row
1208 contains the characters that need translation. */
1209 int charset_base_code
= c
& ~0xff;
1210 if (charset_base
== -1)
1211 charset_base
= charset_base_code
;
1212 else if (charset_base
!= charset_base_code
)
1213 /* If two different rows appear, needing translation,
1214 then we cannot use boyer_moore search. */
1216 /* ??? Handa: this must do simple = 0
1217 if c is a composite character. */
1220 /* Store this character into the translated pattern. */
1221 bcopy (str
, pat
, charlen
);
1223 base_pat
+= in_charlen
;
1224 len_byte
-= in_charlen
;
1231 int c
, translated
, inverse
;
1233 /* If we got here and the RE flag is set, it's because we're
1234 dealing with a regexp known to be trivial, so the backslash
1235 just quotes the next character. */
1236 if (RE
&& *base_pat
== '\\')
1242 TRANSLATE (translated
, trt
, c
);
1243 TRANSLATE (inverse
, inverse_trt
, c
);
1245 /* Did this char actually get translated?
1246 Would any other char get translated into it? */
1247 if (translated
!= c
|| inverse
!= c
)
1249 /* Keep track of which character set row
1250 contains the characters that need translation. */
1251 int charset_base_code
= c
& ~0xff;
1252 if (charset_base
== -1)
1253 charset_base
= charset_base_code
;
1254 else if (charset_base
!= charset_base_code
)
1255 /* If two different rows appear, needing translation,
1256 then we cannot use boyer_moore search. */
1259 *pat
++ = translated
;
1263 len_byte
= pat
- patbuf
;
1264 len
= raw_pattern_size
;
1265 pat
= base_pat
= patbuf
;
1268 return boyer_moore (n
, pat
, len
, len_byte
, trt
, inverse_trt
,
1269 pos
, pos_byte
, lim
, lim_byte
,
1272 return simple_search (n
, pat
, len
, len_byte
, trt
,
1273 pos
, pos_byte
, lim
, lim_byte
);
1277 /* Do a simple string search N times for the string PAT,
1278 whose length is LEN/LEN_BYTE,
1279 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1280 TRT is the translation table.
1282 Return the character position where the match is found.
1283 Otherwise, if M matches remained to be found, return -M.
1285 This kind of search works regardless of what is in PAT and
1286 regardless of what is in TRT. It is used in cases where
1287 boyer_moore cannot work. */
1290 simple_search (n
, pat
, len
, len_byte
, trt
, pos
, pos_byte
, lim
, lim_byte
)
1298 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1299 int forward
= n
> 0;
1301 if (lim
> pos
&& multibyte
)
1306 /* Try matching at position POS. */
1308 int this_pos_byte
= pos_byte
;
1310 int this_len_byte
= len_byte
;
1311 unsigned char *p
= pat
;
1312 if (pos
+ len
> lim
)
1315 while (this_len
> 0)
1317 int charlen
, buf_charlen
;
1320 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1321 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1322 ZV_BYTE
- this_pos_byte
,
1324 TRANSLATE (buf_ch
, trt
, buf_ch
);
1326 if (buf_ch
!= pat_ch
)
1329 this_len_byte
-= charlen
;
1333 this_pos_byte
+= buf_charlen
;
1340 pos_byte
+= len_byte
;
1344 INC_BOTH (pos
, pos_byte
);
1354 /* Try matching at position POS. */
1357 unsigned char *p
= pat
;
1359 if (pos
+ len
> lim
)
1362 while (this_len
> 0)
1365 int buf_ch
= FETCH_BYTE (this_pos
);
1366 TRANSLATE (buf_ch
, trt
, buf_ch
);
1368 if (buf_ch
!= pat_ch
)
1386 /* Backwards search. */
1387 else if (lim
< pos
&& multibyte
)
1392 /* Try matching at position POS. */
1393 int this_pos
= pos
- len
;
1394 int this_pos_byte
= pos_byte
- len_byte
;
1396 int this_len_byte
= len_byte
;
1397 unsigned char *p
= pat
;
1399 if (pos
- len
< lim
)
1402 while (this_len
> 0)
1404 int charlen
, buf_charlen
;
1407 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1408 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1409 ZV_BYTE
- this_pos_byte
,
1411 TRANSLATE (buf_ch
, trt
, buf_ch
);
1413 if (buf_ch
!= pat_ch
)
1416 this_len_byte
-= charlen
;
1419 this_pos_byte
+= buf_charlen
;
1426 pos_byte
-= len_byte
;
1430 DEC_BOTH (pos
, pos_byte
);
1440 /* Try matching at position POS. */
1441 int this_pos
= pos
- len
;
1443 unsigned char *p
= pat
;
1445 if (pos
- len
< lim
)
1448 while (this_len
> 0)
1451 int buf_ch
= FETCH_BYTE (this_pos
);
1452 TRANSLATE (buf_ch
, trt
, buf_ch
);
1454 if (buf_ch
!= pat_ch
)
1476 set_search_regs ((multibyte
? pos_byte
: pos
) - len_byte
, len_byte
);
1478 set_search_regs (multibyte
? pos_byte
: pos
, len_byte
);
1488 /* Do Boyer-Moore search N times for the string PAT,
1489 whose length is LEN/LEN_BYTE,
1490 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1491 DIRECTION says which direction we search in.
1492 TRT and INVERSE_TRT are translation tables.
1494 This kind of search works if all the characters in PAT that have
1495 nontrivial translation are the same aside from the last byte. This
1496 makes it possible to translate just the last byte of a character,
1497 and do so after just a simple test of the context.
1499 If that criterion is not satisfied, do not call this function. */
1502 boyer_moore (n
, base_pat
, len
, len_byte
, trt
, inverse_trt
,
1503 pos
, pos_byte
, lim
, lim_byte
, charset_base
)
1505 unsigned char *base_pat
;
1508 Lisp_Object inverse_trt
;
1513 int direction
= ((n
> 0) ? 1 : -1);
1514 register int dirlen
;
1515 int infinity
, limit
, k
, stride_for_teases
;
1516 register int *BM_tab
;
1518 register unsigned char *cursor
, *p_limit
;
1520 unsigned char *pat
, *pat_end
;
1521 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1523 unsigned char simple_translate
[0400];
1524 int translate_prev_byte
;
1525 int translate_anteprev_byte
;
1528 int BM_tab_space
[0400];
1529 BM_tab
= &BM_tab_space
[0];
1531 BM_tab
= (int *) alloca (0400 * sizeof (int));
1533 /* The general approach is that we are going to maintain that we know */
1534 /* the first (closest to the present position, in whatever direction */
1535 /* we're searching) character that could possibly be the last */
1536 /* (furthest from present position) character of a valid match. We */
1537 /* advance the state of our knowledge by looking at that character */
1538 /* and seeing whether it indeed matches the last character of the */
1539 /* pattern. If it does, we take a closer look. If it does not, we */
1540 /* move our pointer (to putative last characters) as far as is */
1541 /* logically possible. This amount of movement, which I call a */
1542 /* stride, will be the length of the pattern if the actual character */
1543 /* appears nowhere in the pattern, otherwise it will be the distance */
1544 /* from the last occurrence of that character to the end of the */
1546 /* As a coding trick, an enormous stride is coded into the table for */
1547 /* characters that match the last character. This allows use of only */
1548 /* a single test, a test for having gone past the end of the */
1549 /* permissible match region, to test for both possible matches (when */
1550 /* the stride goes past the end immediately) and failure to */
1551 /* match (where you get nudged past the end one stride at a time). */
1553 /* Here we make a "mickey mouse" BM table. The stride of the search */
1554 /* is determined only by the last character of the putative match. */
1555 /* If that character does not match, we will stride the proper */
1556 /* distance to propose a match that superimposes it on the last */
1557 /* instance of a character that matches it (per trt), or misses */
1558 /* it entirely if there is none. */
1560 dirlen
= len_byte
* direction
;
1561 infinity
= dirlen
- (lim_byte
+ pos_byte
+ len_byte
+ len_byte
) * direction
;
1563 /* Record position after the end of the pattern. */
1564 pat_end
= base_pat
+ len_byte
;
1565 /* BASE_PAT points to a character that we start scanning from.
1566 It is the first character in a forward search,
1567 the last character in a backward search. */
1569 base_pat
= pat_end
- 1;
1571 BM_tab_base
= BM_tab
;
1573 j
= dirlen
; /* to get it in a register */
1574 /* A character that does not appear in the pattern induces a */
1575 /* stride equal to the pattern length. */
1576 while (BM_tab_base
!= BM_tab
)
1584 /* We use this for translation, instead of TRT itself.
1585 We fill this in to handle the characters that actually
1586 occur in the pattern. Others don't matter anyway! */
1587 bzero (simple_translate
, sizeof simple_translate
);
1588 for (i
= 0; i
< 0400; i
++)
1589 simple_translate
[i
] = i
;
1592 while (i
!= infinity
)
1594 unsigned char *ptr
= base_pat
+ i
;
1602 int this_translated
= 1;
1605 /* Is *PTR the last byte of a character? */
1606 && (pat_end
- ptr
== 1 || CHAR_HEAD_P (ptr
[1])))
1608 unsigned char *charstart
= ptr
;
1609 while (! CHAR_HEAD_P (*charstart
))
1611 untranslated
= STRING_CHAR (charstart
, ptr
- charstart
+ 1);
1612 if (charset_base
== (untranslated
& ~0xff))
1614 TRANSLATE (ch
, trt
, untranslated
);
1615 if (! CHAR_HEAD_P (*ptr
))
1617 translate_prev_byte
= ptr
[-1];
1618 if (! CHAR_HEAD_P (translate_prev_byte
))
1619 translate_anteprev_byte
= ptr
[-2];
1624 this_translated
= 0;
1628 else if (!multibyte
)
1629 TRANSLATE (ch
, trt
, *ptr
);
1633 this_translated
= 0;
1637 j
= ((unsigned char) ch
) | 0200;
1639 j
= (unsigned char) ch
;
1642 stride_for_teases
= BM_tab
[j
];
1644 BM_tab
[j
] = dirlen
- i
;
1645 /* A translation table is accompanied by its inverse -- see */
1646 /* comment following downcase_table for details */
1647 if (this_translated
)
1649 int starting_ch
= ch
;
1653 TRANSLATE (ch
, inverse_trt
, ch
);
1655 j
= ((unsigned char) ch
) | 0200;
1657 j
= (unsigned char) ch
;
1659 /* For all the characters that map into CH,
1660 set up simple_translate to map the last byte
1662 simple_translate
[j
] = starting_j
;
1663 if (ch
== starting_ch
)
1665 BM_tab
[j
] = dirlen
- i
;
1674 stride_for_teases
= BM_tab
[j
];
1675 BM_tab
[j
] = dirlen
- i
;
1677 /* stride_for_teases tells how much to stride if we get a */
1678 /* match on the far character but are subsequently */
1679 /* disappointed, by recording what the stride would have been */
1680 /* for that character if the last character had been */
1683 infinity
= dirlen
- infinity
;
1684 pos_byte
+= dirlen
- ((direction
> 0) ? direction
: 0);
1685 /* loop invariant - POS_BYTE points at where last char (first
1686 char if reverse) of pattern would align in a possible match. */
1690 unsigned char *tail_end_ptr
;
1692 /* It's been reported that some (broken) compiler thinks that
1693 Boolean expressions in an arithmetic context are unsigned.
1694 Using an explicit ?1:0 prevents this. */
1695 if ((lim_byte
- pos_byte
- ((direction
> 0) ? 1 : 0)) * direction
1697 return (n
* (0 - direction
));
1698 /* First we do the part we can by pointers (maybe nothing) */
1701 limit
= pos_byte
- dirlen
+ direction
;
1704 limit
= BUFFER_CEILING_OF (limit
);
1705 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1706 can take on without hitting edge of buffer or the gap. */
1707 limit
= min (limit
, pos_byte
+ 20000);
1708 limit
= min (limit
, lim_byte
- 1);
1712 limit
= BUFFER_FLOOR_OF (limit
);
1713 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1714 can take on without hitting edge of buffer or the gap. */
1715 limit
= max (limit
, pos_byte
- 20000);
1716 limit
= max (limit
, lim_byte
);
1718 tail_end
= BUFFER_CEILING_OF (pos_byte
) + 1;
1719 tail_end_ptr
= BYTE_POS_ADDR (tail_end
);
1721 if ((limit
- pos_byte
) * direction
> 20)
1725 p_limit
= BYTE_POS_ADDR (limit
);
1726 p2
= (cursor
= BYTE_POS_ADDR (pos_byte
));
1727 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1728 while (1) /* use one cursor setting as long as i can */
1730 if (direction
> 0) /* worth duplicating */
1732 /* Use signed comparison if appropriate
1733 to make cursor+infinity sure to be > p_limit.
1734 Assuming that the buffer lies in a range of addresses
1735 that are all "positive" (as ints) or all "negative",
1736 either kind of comparison will work as long
1737 as we don't step by infinity. So pick the kind
1738 that works when we do step by infinity. */
1739 if ((EMACS_INT
) (p_limit
+ infinity
) > (EMACS_INT
) p_limit
)
1740 while ((EMACS_INT
) cursor
<= (EMACS_INT
) p_limit
)
1741 cursor
+= BM_tab
[*cursor
];
1743 while ((EMACS_UINT
) cursor
<= (EMACS_UINT
) p_limit
)
1744 cursor
+= BM_tab
[*cursor
];
1748 if ((EMACS_INT
) (p_limit
+ infinity
) < (EMACS_INT
) p_limit
)
1749 while ((EMACS_INT
) cursor
>= (EMACS_INT
) p_limit
)
1750 cursor
+= BM_tab
[*cursor
];
1752 while ((EMACS_UINT
) cursor
>= (EMACS_UINT
) p_limit
)
1753 cursor
+= BM_tab
[*cursor
];
1755 /* If you are here, cursor is beyond the end of the searched region. */
1756 /* This can happen if you match on the far character of the pattern, */
1757 /* because the "stride" of that character is infinity, a number able */
1758 /* to throw you well beyond the end of the search. It can also */
1759 /* happen if you fail to match within the permitted region and would */
1760 /* otherwise try a character beyond that region */
1761 if ((cursor
- p_limit
) * direction
<= len_byte
)
1762 break; /* a small overrun is genuine */
1763 cursor
-= infinity
; /* large overrun = hit */
1764 i
= dirlen
- direction
;
1767 while ((i
-= direction
) + direction
!= 0)
1770 cursor
-= direction
;
1771 /* Translate only the last byte of a character. */
1773 || ((cursor
== tail_end_ptr
1774 || CHAR_HEAD_P (cursor
[1]))
1775 && (CHAR_HEAD_P (cursor
[0])
1776 || (translate_prev_byte
== cursor
[-1]
1777 && (CHAR_HEAD_P (translate_prev_byte
)
1778 || translate_anteprev_byte
== cursor
[-2])))))
1779 ch
= simple_translate
[*cursor
];
1788 while ((i
-= direction
) + direction
!= 0)
1790 cursor
-= direction
;
1791 if (pat
[i
] != *cursor
)
1795 cursor
+= dirlen
- i
- direction
; /* fix cursor */
1796 if (i
+ direction
== 0)
1800 cursor
-= direction
;
1802 position
= pos_byte
+ cursor
- p2
+ ((direction
> 0)
1803 ? 1 - len_byte
: 0);
1804 set_search_regs (position
, len_byte
);
1806 if ((n
-= direction
) != 0)
1807 cursor
+= dirlen
; /* to resume search */
1809 return ((direction
> 0)
1810 ? search_regs
.end
[0] : search_regs
.start
[0]);
1813 cursor
+= stride_for_teases
; /* <sigh> we lose - */
1815 pos_byte
+= cursor
- p2
;
1818 /* Now we'll pick up a clump that has to be done the hard */
1819 /* way because it covers a discontinuity */
1821 limit
= ((direction
> 0)
1822 ? BUFFER_CEILING_OF (pos_byte
- dirlen
+ 1)
1823 : BUFFER_FLOOR_OF (pos_byte
- dirlen
- 1));
1824 limit
= ((direction
> 0)
1825 ? min (limit
+ len_byte
, lim_byte
- 1)
1826 : max (limit
- len_byte
, lim_byte
));
1827 /* LIMIT is now the last value POS_BYTE can have
1828 and still be valid for a possible match. */
1831 /* This loop can be coded for space rather than */
1832 /* speed because it will usually run only once. */
1833 /* (the reach is at most len + 21, and typically */
1834 /* does not exceed len) */
1835 while ((limit
- pos_byte
) * direction
>= 0)
1836 pos_byte
+= BM_tab
[FETCH_BYTE (pos_byte
)];
1837 /* now run the same tests to distinguish going off the */
1838 /* end, a match or a phony match. */
1839 if ((pos_byte
- limit
) * direction
<= len_byte
)
1840 break; /* ran off the end */
1841 /* Found what might be a match.
1842 Set POS_BYTE back to last (first if reverse) pos. */
1843 pos_byte
-= infinity
;
1844 i
= dirlen
- direction
;
1845 while ((i
-= direction
) + direction
!= 0)
1849 pos_byte
-= direction
;
1850 ptr
= BYTE_POS_ADDR (pos_byte
);
1851 /* Translate only the last byte of a character. */
1853 || ((ptr
== tail_end_ptr
1854 || CHAR_HEAD_P (ptr
[1]))
1855 && (CHAR_HEAD_P (ptr
[0])
1856 || (translate_prev_byte
== ptr
[-1]
1857 && (CHAR_HEAD_P (translate_prev_byte
)
1858 || translate_anteprev_byte
== ptr
[-2])))))
1859 ch
= simple_translate
[*ptr
];
1865 /* Above loop has moved POS_BYTE part or all the way
1866 back to the first pos (last pos if reverse).
1867 Set it once again at the last (first if reverse) char. */
1868 pos_byte
+= dirlen
- i
- direction
;
1869 if (i
+ direction
== 0)
1872 pos_byte
-= direction
;
1874 position
= pos_byte
+ ((direction
> 0) ? 1 - len_byte
: 0);
1876 set_search_regs (position
, len_byte
);
1878 if ((n
-= direction
) != 0)
1879 pos_byte
+= dirlen
; /* to resume search */
1881 return ((direction
> 0)
1882 ? search_regs
.end
[0] : search_regs
.start
[0]);
1885 pos_byte
+= stride_for_teases
;
1888 /* We have done one clump. Can we continue? */
1889 if ((lim_byte
- pos_byte
) * direction
< 0)
1890 return ((0 - n
) * direction
);
1892 return BYTE_TO_CHAR (pos_byte
);
1895 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1896 for the overall match just found in the current buffer.
1897 Also clear out the match data for registers 1 and up. */
1900 set_search_regs (beg_byte
, nbytes
)
1901 int beg_byte
, nbytes
;
1905 /* Make sure we have registers in which to store
1906 the match position. */
1907 if (search_regs
.num_regs
== 0)
1909 search_regs
.start
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
1910 search_regs
.end
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
1911 search_regs
.num_regs
= 2;
1914 /* Clear out the other registers. */
1915 for (i
= 1; i
< search_regs
.num_regs
; i
++)
1917 search_regs
.start
[i
] = -1;
1918 search_regs
.end
[i
] = -1;
1921 search_regs
.start
[0] = BYTE_TO_CHAR (beg_byte
);
1922 search_regs
.end
[0] = BYTE_TO_CHAR (beg_byte
+ nbytes
);
1923 XSETBUFFER (last_thing_searched
, current_buffer
);
1926 /* Given a string of words separated by word delimiters,
1927 compute a regexp that matches those exact words
1928 separated by arbitrary punctuation. */
1934 register unsigned char *p
, *o
;
1935 register int i
, i_byte
, len
, punct_count
= 0, word_count
= 0;
1940 CHECK_STRING (string
, 0);
1941 p
= XSTRING (string
)->data
;
1942 len
= XSTRING (string
)->size
;
1944 for (i
= 0, i_byte
= 0; i
< len
; )
1948 if (STRING_MULTIBYTE (string
))
1949 FETCH_STRING_CHAR_ADVANCE (c
, string
, i
, i_byte
);
1951 c
= XSTRING (string
)->data
[i
++];
1953 if (SYNTAX (c
) != Sword
)
1956 if (i
> 0 && SYNTAX (prev_c
) == Sword
)
1963 if (SYNTAX (prev_c
) == Sword
)
1966 return build_string ("");
1968 adjust
= - punct_count
+ 5 * (word_count
- 1) + 4;
1969 val
= make_uninit_multibyte_string (len
+ adjust
,
1970 STRING_BYTES (XSTRING (string
)) + adjust
);
1972 o
= XSTRING (val
)->data
;
1977 for (i
= 0, i_byte
= 0; i
< len
; )
1980 int i_byte_orig
= i_byte
;
1982 if (STRING_MULTIBYTE (string
))
1983 FETCH_STRING_CHAR_ADVANCE (c
, string
, i
, i_byte
);
1985 c
= XSTRING (string
)->data
[i
++];
1987 if (SYNTAX (c
) == Sword
)
1989 bcopy (&XSTRING (string
)->data
[i_byte_orig
], o
,
1990 i_byte
- i_byte_orig
);
1991 o
+= i_byte
- i_byte_orig
;
1993 else if (i
> 0 && SYNTAX (prev_c
) == Sword
&& --word_count
)
2011 DEFUN ("search-backward", Fsearch_backward
, Ssearch_backward
, 1, 4,
2012 "MSearch backward: ",
2013 "Search backward from point for STRING.\n\
2014 Set point to the beginning of the occurrence found, and return point.\n\
2015 An optional second argument bounds the search; it is a buffer position.\n\
2016 The match found must not extend before that position.\n\
2017 Optional third argument, if t, means if fail just return nil (no error).\n\
2018 If not nil and not t, position at limit of search and return nil.\n\
2019 Optional fourth argument is repeat count--search for successive occurrences.\n\
2020 See also the functions `match-beginning', `match-end' and `replace-match'.")
2021 (string
, bound
, noerror
, count
)
2022 Lisp_Object string
, bound
, noerror
, count
;
2024 return search_command (string
, bound
, noerror
, count
, -1, 0, 0);
2027 DEFUN ("search-forward", Fsearch_forward
, Ssearch_forward
, 1, 4, "MSearch: ",
2028 "Search forward from point for STRING.\n\
2029 Set point to the end of the occurrence found, and return point.\n\
2030 An optional second argument bounds the search; it is a buffer position.\n\
2031 The match found must not extend after that position. nil is equivalent\n\
2033 Optional third argument, if t, means if fail just return nil (no error).\n\
2034 If not nil and not t, move to limit of search and return nil.\n\
2035 Optional fourth argument is repeat count--search for successive occurrences.\n\
2036 See also the functions `match-beginning', `match-end' and `replace-match'.")
2037 (string
, bound
, noerror
, count
)
2038 Lisp_Object string
, bound
, noerror
, count
;
2040 return search_command (string
, bound
, noerror
, count
, 1, 0, 0);
2043 DEFUN ("word-search-backward", Fword_search_backward
, Sword_search_backward
, 1, 4,
2044 "sWord search backward: ",
2045 "Search backward from point for STRING, ignoring differences in punctuation.\n\
2046 Set point to the beginning of the occurrence found, and return point.\n\
2047 An optional second argument bounds the search; it is a buffer position.\n\
2048 The match found must not extend before that position.\n\
2049 Optional third argument, if t, means if fail just return nil (no error).\n\
2050 If not nil and not t, move to limit of search and return nil.\n\
2051 Optional fourth argument is repeat count--search for successive occurrences.")
2052 (string
, bound
, noerror
, count
)
2053 Lisp_Object string
, bound
, noerror
, count
;
2055 return search_command (wordify (string
), bound
, noerror
, count
, -1, 1, 0);
2058 DEFUN ("word-search-forward", Fword_search_forward
, Sword_search_forward
, 1, 4,
2060 "Search forward from point for STRING, ignoring differences in punctuation.\n\
2061 Set point to the end of the occurrence found, and return point.\n\
2062 An optional second argument bounds the search; it is a buffer position.\n\
2063 The match found must not extend after that position.\n\
2064 Optional third argument, if t, means if fail just return nil (no error).\n\
2065 If not nil and not t, move to limit of search and return nil.\n\
2066 Optional fourth argument is repeat count--search for successive occurrences.")
2067 (string
, bound
, noerror
, count
)
2068 Lisp_Object string
, bound
, noerror
, count
;
2070 return search_command (wordify (string
), bound
, noerror
, count
, 1, 1, 0);
2073 DEFUN ("re-search-backward", Fre_search_backward
, Sre_search_backward
, 1, 4,
2074 "sRE search backward: ",
2075 "Search backward from point for match for regular expression REGEXP.\n\
2076 Set point to the beginning of the match, and return point.\n\
2077 The match found is the one starting last in the buffer\n\
2078 and yet ending before the origin of the search.\n\
2079 An optional second argument bounds the search; it is a buffer position.\n\
2080 The match found must start at or after that position.\n\
2081 Optional third argument, if t, means if fail just return nil (no error).\n\
2082 If not nil and not t, move to limit of search and return nil.\n\
2083 Optional fourth argument is repeat count--search for successive occurrences.\n\
2084 See also the functions `match-beginning', `match-end' and `replace-match'.")
2085 (regexp
, bound
, noerror
, count
)
2086 Lisp_Object regexp
, bound
, noerror
, count
;
2088 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 0);
2091 DEFUN ("re-search-forward", Fre_search_forward
, Sre_search_forward
, 1, 4,
2093 "Search forward from point for regular expression REGEXP.\n\
2094 Set point to the end of the occurrence found, and return point.\n\
2095 An optional second argument bounds the search; it is a buffer position.\n\
2096 The match found must not extend after that position.\n\
2097 Optional third argument, if t, means if fail just return nil (no error).\n\
2098 If not nil and not t, move to limit of search and return nil.\n\
2099 Optional fourth argument is repeat count--search for successive occurrences.\n\
2100 See also the functions `match-beginning', `match-end' and `replace-match'.")
2101 (regexp
, bound
, noerror
, count
)
2102 Lisp_Object regexp
, bound
, noerror
, count
;
2104 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 0);
2107 DEFUN ("posix-search-backward", Fposix_search_backward
, Sposix_search_backward
, 1, 4,
2108 "sPosix search backward: ",
2109 "Search backward from point for match for regular expression REGEXP.\n\
2110 Find the longest match in accord with Posix regular expression rules.\n\
2111 Set point to the beginning of the match, and return point.\n\
2112 The match found is the one starting last in the buffer\n\
2113 and yet ending before the origin of the search.\n\
2114 An optional second argument bounds the search; it is a buffer position.\n\
2115 The match found must start at or after that position.\n\
2116 Optional third argument, if t, means if fail just return nil (no error).\n\
2117 If not nil and not t, move to limit of search and return nil.\n\
2118 Optional fourth argument is repeat count--search for successive occurrences.\n\
2119 See also the functions `match-beginning', `match-end' and `replace-match'.")
2120 (regexp
, bound
, noerror
, count
)
2121 Lisp_Object regexp
, bound
, noerror
, count
;
2123 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 1);
2126 DEFUN ("posix-search-forward", Fposix_search_forward
, Sposix_search_forward
, 1, 4,
2128 "Search forward from point for regular expression REGEXP.\n\
2129 Find the longest match in accord with Posix regular expression rules.\n\
2130 Set point to the end of the occurrence found, and return point.\n\
2131 An optional second argument bounds the search; it is a buffer position.\n\
2132 The match found must not extend after that position.\n\
2133 Optional third argument, if t, means if fail just return nil (no error).\n\
2134 If not nil and not t, move to limit of search and return nil.\n\
2135 Optional fourth argument is repeat count--search for successive occurrences.\n\
2136 See also the functions `match-beginning', `match-end' and `replace-match'.")
2137 (regexp
, bound
, noerror
, count
)
2138 Lisp_Object regexp
, bound
, noerror
, count
;
2140 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 1);
2143 DEFUN ("replace-match", Freplace_match
, Sreplace_match
, 1, 5, 0,
2144 "Replace text matched by last search with NEWTEXT.\n\
2145 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
2146 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
2147 based on the replaced text.\n\
2148 If the replaced text has only capital letters\n\
2149 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
2150 If the replaced text has at least one word starting with a capital letter,\n\
2151 then capitalize each word in NEWTEXT.\n\n\
2152 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
2153 Otherwise treat `\\' as special:\n\
2154 `\\&' in NEWTEXT means substitute original matched text.\n\
2155 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
2156 If Nth parens didn't match, substitute nothing.\n\
2157 `\\\\' means insert one `\\'.\n\
2158 FIXEDCASE and LITERAL are optional arguments.\n\
2159 Leaves point at end of replacement text.\n\
2161 The optional fourth argument STRING can be a string to modify.\n\
2162 In that case, this function creates and returns a new string\n\
2163 which is made by replacing the part of STRING that was matched.\n\
2165 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
2166 It says to replace just that subexpression instead of the whole match.\n\
2167 This is useful only after a regular expression search or match\n\
2168 since only regular expressions have distinguished subexpressions.")
2169 (newtext
, fixedcase
, literal
, string
, subexp
)
2170 Lisp_Object newtext
, fixedcase
, literal
, string
, subexp
;
2172 enum { nochange
, all_caps
, cap_initial
} case_action
;
2173 register int pos
, last
;
2174 int some_multiletter_word
;
2177 int some_nonuppercase_initial
;
2178 register int c
, prevc
;
2181 int opoint
, newpoint
;
2183 CHECK_STRING (newtext
, 0);
2185 if (! NILP (string
))
2186 CHECK_STRING (string
, 4);
2188 case_action
= nochange
; /* We tried an initialization */
2189 /* but some C compilers blew it */
2191 if (search_regs
.num_regs
<= 0)
2192 error ("replace-match called before any match found");
2198 CHECK_NUMBER (subexp
, 3);
2199 sub
= XINT (subexp
);
2200 if (sub
< 0 || sub
>= search_regs
.num_regs
)
2201 args_out_of_range (subexp
, make_number (search_regs
.num_regs
));
2206 if (search_regs
.start
[sub
] < BEGV
2207 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2208 || search_regs
.end
[sub
] > ZV
)
2209 args_out_of_range (make_number (search_regs
.start
[sub
]),
2210 make_number (search_regs
.end
[sub
]));
2214 if (search_regs
.start
[sub
] < 0
2215 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2216 || search_regs
.end
[sub
] > XSTRING (string
)->size
)
2217 args_out_of_range (make_number (search_regs
.start
[sub
]),
2218 make_number (search_regs
.end
[sub
]));
2221 if (NILP (fixedcase
))
2224 /* Decide how to casify by examining the matched text. */
2227 last
= CHAR_TO_BYTE (search_regs
.end
[sub
]);
2229 last
= search_regs
.end
[sub
];
2232 beg
= CHAR_TO_BYTE (search_regs
.start
[sub
]);
2234 beg
= search_regs
.start
[sub
];
2237 case_action
= all_caps
;
2239 /* some_multiletter_word is set nonzero if any original word
2240 is more than one letter long. */
2241 some_multiletter_word
= 0;
2243 some_nonuppercase_initial
= 0;
2246 for (pos
= beg
; pos
< last
; pos
++)
2249 c
= FETCH_BYTE (pos
);
2251 c
= XSTRING (string
)->data
[pos
];
2255 /* Cannot be all caps if any original char is lower case */
2258 if (SYNTAX (prevc
) != Sword
)
2259 some_nonuppercase_initial
= 1;
2261 some_multiletter_word
= 1;
2263 else if (!NOCASEP (c
))
2266 if (SYNTAX (prevc
) != Sword
)
2269 some_multiletter_word
= 1;
2273 /* If the initial is a caseless word constituent,
2274 treat that like a lowercase initial. */
2275 if (SYNTAX (prevc
) != Sword
)
2276 some_nonuppercase_initial
= 1;
2282 /* Convert to all caps if the old text is all caps
2283 and has at least one multiletter word. */
2284 if (! some_lowercase
&& some_multiletter_word
)
2285 case_action
= all_caps
;
2286 /* Capitalize each word, if the old text has all capitalized words. */
2287 else if (!some_nonuppercase_initial
&& some_multiletter_word
)
2288 case_action
= cap_initial
;
2289 else if (!some_nonuppercase_initial
&& some_uppercase
)
2290 /* Should x -> yz, operating on X, give Yz or YZ?
2291 We'll assume the latter. */
2292 case_action
= all_caps
;
2294 case_action
= nochange
;
2297 /* Do replacement in a string. */
2300 Lisp_Object before
, after
;
2302 before
= Fsubstring (string
, make_number (0),
2303 make_number (search_regs
.start
[sub
]));
2304 after
= Fsubstring (string
, make_number (search_regs
.end
[sub
]), Qnil
);
2306 /* Substitute parts of the match into NEWTEXT
2311 int lastpos_byte
= 0;
2312 /* We build up the substituted string in ACCUM. */
2319 for (pos_byte
= 0, pos
= 0; pos_byte
< STRING_BYTES (XSTRING (newtext
));)
2323 int delbackslash
= 0;
2325 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2329 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2332 substart
= search_regs
.start
[sub
];
2333 subend
= search_regs
.end
[sub
];
2335 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
2337 if (search_regs
.start
[c
- '0'] >= 0)
2339 substart
= search_regs
.start
[c
- '0'];
2340 subend
= search_regs
.end
[c
- '0'];
2346 error ("Invalid use of `\\' in replacement text");
2350 if (pos
- 2 != lastpos
)
2351 middle
= substring_both (newtext
, lastpos
,
2353 pos
- 2, pos_byte
- 2);
2356 accum
= concat3 (accum
, middle
,
2358 make_number (substart
),
2359 make_number (subend
)));
2361 lastpos_byte
= pos_byte
;
2363 else if (delbackslash
)
2365 middle
= substring_both (newtext
, lastpos
,
2367 pos
- 1, pos_byte
- 1);
2369 accum
= concat2 (accum
, middle
);
2371 lastpos_byte
= pos_byte
;
2376 middle
= substring_both (newtext
, lastpos
,
2382 newtext
= concat2 (accum
, middle
);
2385 /* Do case substitution in NEWTEXT if desired. */
2386 if (case_action
== all_caps
)
2387 newtext
= Fupcase (newtext
);
2388 else if (case_action
== cap_initial
)
2389 newtext
= Fupcase_initials (newtext
);
2391 return concat3 (before
, newtext
, after
);
2394 /* Record point, the move (quietly) to the start of the match. */
2395 if (PT
> search_regs
.start
[sub
])
2400 TEMP_SET_PT (search_regs
.start
[sub
]);
2402 /* We insert the replacement text before the old text, and then
2403 delete the original text. This means that markers at the
2404 beginning or end of the original will float to the corresponding
2405 position in the replacement. */
2406 if (!NILP (literal
))
2407 Finsert_and_inherit (1, &newtext
);
2410 struct gcpro gcpro1
;
2413 for (pos
= 0; pos
< XSTRING (newtext
)->size
; pos
++)
2415 int offset
= PT
- search_regs
.start
[sub
];
2417 c
= XSTRING (newtext
)->data
[pos
];
2420 c
= XSTRING (newtext
)->data
[++pos
];
2422 Finsert_buffer_substring
2423 (Fcurrent_buffer (),
2424 make_number (search_regs
.start
[sub
] + offset
),
2425 make_number (search_regs
.end
[sub
] + offset
));
2426 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
2428 if (search_regs
.start
[c
- '0'] >= 1)
2429 Finsert_buffer_substring
2430 (Fcurrent_buffer (),
2431 make_number (search_regs
.start
[c
- '0'] + offset
),
2432 make_number (search_regs
.end
[c
- '0'] + offset
));
2437 error ("Invalid use of `\\' in replacement text");
2445 inslen
= PT
- (search_regs
.start
[sub
]);
2446 del_range (search_regs
.start
[sub
] + inslen
, search_regs
.end
[sub
] + inslen
);
2448 if (case_action
== all_caps
)
2449 Fupcase_region (make_number (PT
- inslen
), make_number (PT
));
2450 else if (case_action
== cap_initial
)
2451 Fupcase_initials_region (make_number (PT
- inslen
), make_number (PT
));
2455 /* Put point back where it was in the text. */
2457 TEMP_SET_PT (opoint
+ ZV
);
2459 TEMP_SET_PT (opoint
);
2461 /* Now move point "officially" to the start of the inserted replacement. */
2462 move_if_not_intangible (newpoint
);
2468 match_limit (num
, beginningp
)
2474 CHECK_NUMBER (num
, 0);
2476 if (n
< 0 || n
>= search_regs
.num_regs
)
2477 args_out_of_range (num
, make_number (search_regs
.num_regs
));
2478 if (search_regs
.num_regs
<= 0
2479 || search_regs
.start
[n
] < 0)
2481 return (make_number ((beginningp
) ? search_regs
.start
[n
]
2482 : search_regs
.end
[n
]));
2485 DEFUN ("match-beginning", Fmatch_beginning
, Smatch_beginning
, 1, 1, 0,
2486 "Return position of start of text matched by last search.\n\
2487 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2489 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2491 Zero means the entire text matched by the whole regexp or whole string.")
2495 return match_limit (subexp
, 1);
2498 DEFUN ("match-end", Fmatch_end
, Smatch_end
, 1, 1, 0,
2499 "Return position of end of text matched by last search.\n\
2500 SUBEXP, a number, specifies which parenthesized expression in the last\n\
2502 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
2504 Zero means the entire text matched by the whole regexp or whole string.")
2508 return match_limit (subexp
, 0);
2511 DEFUN ("match-data", Fmatch_data
, Smatch_data
, 0, 2, 0,
2512 "Return a list containing all info on what the last search matched.\n\
2513 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
2514 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
2515 if the last match was on a buffer; integers or nil if a string was matched.\n\
2516 Use `store-match-data' to reinstate the data in this list.\n\
2518 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
2519 \(rather than markers) to represent buffer positions.\n\
2520 If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
2521 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
2523 Lisp_Object integers
, reuse
;
2525 Lisp_Object tail
, prev
;
2529 if (NILP (last_thing_searched
))
2532 data
= (Lisp_Object
*) alloca ((2 * search_regs
.num_regs
)
2533 * sizeof (Lisp_Object
));
2536 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2538 int start
= search_regs
.start
[i
];
2541 if (EQ (last_thing_searched
, Qt
)
2542 || ! NILP (integers
))
2544 XSETFASTINT (data
[2 * i
], start
);
2545 XSETFASTINT (data
[2 * i
+ 1], search_regs
.end
[i
]);
2547 else if (BUFFERP (last_thing_searched
))
2549 data
[2 * i
] = Fmake_marker ();
2550 Fset_marker (data
[2 * i
],
2551 make_number (start
),
2552 last_thing_searched
);
2553 data
[2 * i
+ 1] = Fmake_marker ();
2554 Fset_marker (data
[2 * i
+ 1],
2555 make_number (search_regs
.end
[i
]),
2556 last_thing_searched
);
2559 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2565 data
[2 * i
] = data
[2 * i
+ 1] = Qnil
;
2568 /* If REUSE is not usable, cons up the values and return them. */
2569 if (! CONSP (reuse
))
2570 return Flist (2 * len
+ 2, data
);
2572 /* If REUSE is a list, store as many value elements as will fit
2573 into the elements of REUSE. */
2574 for (i
= 0, tail
= reuse
; CONSP (tail
);
2575 i
++, tail
= XCONS (tail
)->cdr
)
2577 if (i
< 2 * len
+ 2)
2578 XCONS (tail
)->car
= data
[i
];
2580 XCONS (tail
)->car
= Qnil
;
2584 /* If we couldn't fit all value elements into REUSE,
2585 cons up the rest of them and add them to the end of REUSE. */
2586 if (i
< 2 * len
+ 2)
2587 XCONS (prev
)->cdr
= Flist (2 * len
+ 2 - i
, data
+ i
);
2593 DEFUN ("set-match-data", Fset_match_data
, Sset_match_data
, 1, 1, 0,
2594 "Set internal data on last search match from elements of LIST.\n\
2595 LIST should have been created by calling `match-data' previously.")
2597 register Lisp_Object list
;
2600 register Lisp_Object marker
;
2602 if (running_asynch_code
)
2603 save_search_regs ();
2605 if (!CONSP (list
) && !NILP (list
))
2606 list
= wrong_type_argument (Qconsp
, list
);
2608 /* Unless we find a marker with a buffer in LIST, assume that this
2609 match data came from a string. */
2610 last_thing_searched
= Qt
;
2612 /* Allocate registers if they don't already exist. */
2614 int length
= XFASTINT (Flength (list
)) / 2;
2616 if (length
> search_regs
.num_regs
)
2618 if (search_regs
.num_regs
== 0)
2621 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2623 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2628 = (regoff_t
*) xrealloc (search_regs
.start
,
2629 length
* sizeof (regoff_t
));
2631 = (regoff_t
*) xrealloc (search_regs
.end
,
2632 length
* sizeof (regoff_t
));
2635 search_regs
.num_regs
= length
;
2639 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2641 marker
= Fcar (list
);
2644 search_regs
.start
[i
] = -1;
2649 if (MARKERP (marker
))
2651 if (XMARKER (marker
)->buffer
== 0)
2652 XSETFASTINT (marker
, 0);
2654 XSETBUFFER (last_thing_searched
, XMARKER (marker
)->buffer
);
2657 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
2658 search_regs
.start
[i
] = XINT (marker
);
2661 marker
= Fcar (list
);
2662 if (MARKERP (marker
) && XMARKER (marker
)->buffer
== 0)
2663 XSETFASTINT (marker
, 0);
2665 CHECK_NUMBER_COERCE_MARKER (marker
, 0);
2666 search_regs
.end
[i
] = XINT (marker
);
2674 /* If non-zero the match data have been saved in saved_search_regs
2675 during the execution of a sentinel or filter. */
2676 static int search_regs_saved
;
2677 static struct re_registers saved_search_regs
;
2679 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2680 if asynchronous code (filter or sentinel) is running. */
2684 if (!search_regs_saved
)
2686 saved_search_regs
.num_regs
= search_regs
.num_regs
;
2687 saved_search_regs
.start
= search_regs
.start
;
2688 saved_search_regs
.end
= search_regs
.end
;
2689 search_regs
.num_regs
= 0;
2690 search_regs
.start
= 0;
2691 search_regs
.end
= 0;
2693 search_regs_saved
= 1;
2697 /* Called upon exit from filters and sentinels. */
2699 restore_match_data ()
2701 if (search_regs_saved
)
2703 if (search_regs
.num_regs
> 0)
2705 xfree (search_regs
.start
);
2706 xfree (search_regs
.end
);
2708 search_regs
.num_regs
= saved_search_regs
.num_regs
;
2709 search_regs
.start
= saved_search_regs
.start
;
2710 search_regs
.end
= saved_search_regs
.end
;
2712 search_regs_saved
= 0;
2716 /* Quote a string to inactivate reg-expr chars */
2718 DEFUN ("regexp-quote", Fregexp_quote
, Sregexp_quote
, 1, 1, 0,
2719 "Return a regexp string which matches exactly STRING and nothing else.")
2723 register unsigned char *in
, *out
, *end
;
2724 register unsigned char *temp
;
2725 int backslashes_added
= 0;
2727 CHECK_STRING (string
, 0);
2729 temp
= (unsigned char *) alloca (STRING_BYTES (XSTRING (string
)) * 2);
2731 /* Now copy the data into the new string, inserting escapes. */
2733 in
= XSTRING (string
)->data
;
2734 end
= in
+ STRING_BYTES (XSTRING (string
));
2737 for (; in
!= end
; in
++)
2739 if (*in
== '[' || *in
== ']'
2740 || *in
== '*' || *in
== '.' || *in
== '\\'
2741 || *in
== '?' || *in
== '+'
2742 || *in
== '^' || *in
== '$')
2743 *out
++ = '\\', backslashes_added
++;
2747 return make_specified_string (temp
,
2748 XSTRING (string
)->size
+ backslashes_added
,
2750 STRING_MULTIBYTE (string
));
2758 for (i
= 0; i
< REGEXP_CACHE_SIZE
; ++i
)
2760 searchbufs
[i
].buf
.allocated
= 100;
2761 searchbufs
[i
].buf
.buffer
= (unsigned char *) malloc (100);
2762 searchbufs
[i
].buf
.fastmap
= searchbufs
[i
].fastmap
;
2763 searchbufs
[i
].regexp
= Qnil
;
2764 staticpro (&searchbufs
[i
].regexp
);
2765 searchbufs
[i
].next
= (i
== REGEXP_CACHE_SIZE
-1 ? 0 : &searchbufs
[i
+1]);
2767 searchbuf_head
= &searchbufs
[0];
2769 Qsearch_failed
= intern ("search-failed");
2770 staticpro (&Qsearch_failed
);
2771 Qinvalid_regexp
= intern ("invalid-regexp");
2772 staticpro (&Qinvalid_regexp
);
2774 Fput (Qsearch_failed
, Qerror_conditions
,
2775 Fcons (Qsearch_failed
, Fcons (Qerror
, Qnil
)));
2776 Fput (Qsearch_failed
, Qerror_message
,
2777 build_string ("Search failed"));
2779 Fput (Qinvalid_regexp
, Qerror_conditions
,
2780 Fcons (Qinvalid_regexp
, Fcons (Qerror
, Qnil
)));
2781 Fput (Qinvalid_regexp
, Qerror_message
,
2782 build_string ("Invalid regexp"));
2784 last_thing_searched
= Qnil
;
2785 staticpro (&last_thing_searched
);
2787 defsubr (&Slooking_at
);
2788 defsubr (&Sposix_looking_at
);
2789 defsubr (&Sstring_match
);
2790 defsubr (&Sposix_string_match
);
2791 defsubr (&Ssearch_forward
);
2792 defsubr (&Ssearch_backward
);
2793 defsubr (&Sword_search_forward
);
2794 defsubr (&Sword_search_backward
);
2795 defsubr (&Sre_search_forward
);
2796 defsubr (&Sre_search_backward
);
2797 defsubr (&Sposix_search_forward
);
2798 defsubr (&Sposix_search_backward
);
2799 defsubr (&Sreplace_match
);
2800 defsubr (&Smatch_beginning
);
2801 defsubr (&Smatch_end
);
2802 defsubr (&Smatch_data
);
2803 defsubr (&Sset_match_data
);
2804 defsubr (&Sregexp_quote
);