1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
28 #include "character.h"
30 #include "region-cache.h"
32 #include "blockinput.h"
33 #include "intervals.h"
35 #include <sys/types.h>
38 #define REGEXP_CACHE_SIZE 20
40 /* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
44 struct regexp_cache
*next
;
45 Lisp_Object regexp
, whitespace_regexp
;
46 /* Syntax table for which the regexp applies. We need this because
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
49 Lisp_Object syntax_table
;
50 struct re_pattern_buffer buf
;
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs
[REGEXP_CACHE_SIZE
];
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache
*searchbuf_head
;
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs
;
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched
;
87 /* error condition signaled when regexp compile_pattern fails */
89 Lisp_Object Qinvalid_regexp
;
91 /* Error condition used for failing searches */
92 Lisp_Object Qsearch_failed
;
94 Lisp_Object Vsearch_spaces_regexp
;
96 /* If non-nil, the match data will not be changed during call to
97 searching or matching functions. This variable is for internal use
99 Lisp_Object Vinhibit_changing_match_data
;
101 static void set_search_regs ();
102 static void save_search_regs ();
103 static int simple_search ();
104 static int boyer_moore ();
105 static int search_buffer ();
106 static void matcher_overflow () NO_RETURN
;
111 error ("Stack overflow in regexp matcher");
114 /* Compile a regexp and signal a Lisp error if anything goes wrong.
115 PATTERN is the pattern to compile.
116 CP is the place to put the result.
117 TRANSLATE is a translation table for ignoring case, or nil for none.
118 REGP is the structure that says where to store the "register"
119 values that will result from matching this pattern.
120 If it is 0, we should compile the pattern not to record any
121 subexpression bounds.
122 POSIX is nonzero if we want full backtracking (POSIX style)
123 for this pattern. 0 means backtrack only enough to get a valid match.
125 The behavior also depends on Vsearch_spaces_regexp. */
128 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
)
129 struct regexp_cache
*cp
;
131 Lisp_Object translate
;
132 struct re_registers
*regp
;
139 cp
->buf
.translate
= (! NILP (translate
) ? translate
: make_number (0));
141 cp
->buf
.multibyte
= STRING_MULTIBYTE (pattern
);
142 cp
->buf
.charset_unibyte
= charset_unibyte
;
143 cp
->whitespace_regexp
= Vsearch_spaces_regexp
;
144 /* rms: I think BLOCK_INPUT is not needed here any more,
145 because regex.c defines malloc to call xmalloc.
146 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
147 So let's turn it off. */
149 old
= re_set_syntax (RE_SYNTAX_EMACS
150 | (posix
? 0 : RE_NO_POSIX_BACKTRACKING
));
151 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp
) ? NULL
152 : SDATA (Vsearch_spaces_regexp
));
154 val
= (char *) re_compile_pattern ((char *) SDATA (pattern
),
155 SBYTES (pattern
), &cp
->buf
);
157 /* If the compiled pattern hard codes some of the contents of the
158 syntax-table, it can only be reused with *this* syntax table. */
159 cp
->syntax_table
= cp
->buf
.used_syntax
? current_buffer
->syntax_table
: Qt
;
161 re_set_whitespace_regexp (NULL
);
166 xsignal1 (Qinvalid_regexp
, build_string (val
));
168 cp
->regexp
= Fcopy_sequence (pattern
);
171 /* Shrink each compiled regexp buffer in the cache
172 to the size actually used right now.
173 This is called from garbage collection. */
176 shrink_regexp_cache ()
178 struct regexp_cache
*cp
;
180 for (cp
= searchbuf_head
; cp
!= 0; cp
= cp
->next
)
182 cp
->buf
.allocated
= cp
->buf
.used
;
184 = (unsigned char *) xrealloc (cp
->buf
.buffer
, cp
->buf
.used
);
188 /* Clear the regexp cache w.r.t. a particular syntax table,
189 because it was changed.
190 There is no danger of memory leak here because re_compile_pattern
191 automagically manages the memory in each re_pattern_buffer struct,
192 based on its `allocated' and `buffer' values. */
194 clear_regexp_cache ()
198 for (i
= 0; i
< REGEXP_CACHE_SIZE
; ++i
)
199 /* It's tempting to compare with the syntax-table we've actually changd,
200 but it's not sufficient because char-table inheritance mewans that
201 modifying one syntax-table can change others at the same time. */
202 if (!EQ (searchbufs
[i
].syntax_table
, Qt
))
203 searchbufs
[i
].regexp
= Qnil
;
206 /* Compile a regexp if necessary, but first check to see if there's one in
208 PATTERN is the pattern to compile.
209 TRANSLATE is a translation table for ignoring case, or nil for none.
210 REGP is the structure that says where to store the "register"
211 values that will result from matching this pattern.
212 If it is 0, we should compile the pattern not to record any
213 subexpression bounds.
214 POSIX is nonzero if we want full backtracking (POSIX style)
215 for this pattern. 0 means backtrack only enough to get a valid match. */
217 struct re_pattern_buffer
*
218 compile_pattern (pattern
, regp
, translate
, posix
, multibyte
)
220 struct re_registers
*regp
;
221 Lisp_Object translate
;
222 int posix
, multibyte
;
224 struct regexp_cache
*cp
, **cpp
;
226 for (cpp
= &searchbuf_head
; ; cpp
= &cp
->next
)
229 /* Entries are initialized to nil, and may be set to nil by
230 compile_pattern_1 if the pattern isn't valid. Don't apply
231 string accessors in those cases. However, compile_pattern_1
232 is only applied to the cache entry we pick here to reuse. So
233 nil should never appear before a non-nil entry. */
234 if (NILP (cp
->regexp
))
236 if (SCHARS (cp
->regexp
) == SCHARS (pattern
)
237 && STRING_MULTIBYTE (cp
->regexp
) == STRING_MULTIBYTE (pattern
)
238 && !NILP (Fstring_equal (cp
->regexp
, pattern
))
239 && EQ (cp
->buf
.translate
, (! NILP (translate
) ? translate
: make_number (0)))
240 && cp
->posix
== posix
241 && (EQ (cp
->syntax_table
, Qt
)
242 || EQ (cp
->syntax_table
, current_buffer
->syntax_table
))
243 && !NILP (Fequal (cp
->whitespace_regexp
, Vsearch_spaces_regexp
))
244 && cp
->buf
.charset_unibyte
== charset_unibyte
)
247 /* If we're at the end of the cache, compile into the nil cell
248 we found, or the last (least recently used) cell with a
253 compile_pattern_1 (cp
, pattern
, translate
, regp
, posix
);
258 /* When we get here, cp (aka *cpp) contains the compiled pattern,
259 either because we found it in the cache or because we just compiled it.
260 Move it to the front of the queue to mark it as most recently used. */
262 cp
->next
= searchbuf_head
;
265 /* Advise the searching functions about the space we have allocated
266 for register data. */
268 re_set_registers (&cp
->buf
, regp
, regp
->num_regs
, regp
->start
, regp
->end
);
270 /* The compiled pattern can be used both for mulitbyte and unibyte
271 target. But, we have to tell which the pattern is used for. */
272 cp
->buf
.target_multibyte
= multibyte
;
279 looking_at_1 (string
, posix
)
284 unsigned char *p1
, *p2
;
287 struct re_pattern_buffer
*bufp
;
289 if (running_asynch_code
)
292 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
293 XCHAR_TABLE (current_buffer
->case_canon_table
)->extras
[2]
294 = current_buffer
->case_eqv_table
;
296 CHECK_STRING (string
);
297 bufp
= compile_pattern (string
,
298 (NILP (Vinhibit_changing_match_data
)
299 ? &search_regs
: NULL
),
300 (!NILP (current_buffer
->case_fold_search
)
301 ? current_buffer
->case_canon_table
: Qnil
),
303 !NILP (current_buffer
->enable_multibyte_characters
));
306 QUIT
; /* Do a pending quit right away, to avoid paradoxical behavior */
308 /* Get pointers and sizes of the two strings
309 that make up the visible portion of the buffer. */
312 s1
= GPT_BYTE
- BEGV_BYTE
;
314 s2
= ZV_BYTE
- GPT_BYTE
;
318 s2
= ZV_BYTE
- BEGV_BYTE
;
323 s1
= ZV_BYTE
- BEGV_BYTE
;
327 re_match_object
= Qnil
;
329 i
= re_match_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
331 (NILP (Vinhibit_changing_match_data
)
332 ? &search_regs
: NULL
),
333 ZV_BYTE
- BEGV_BYTE
);
339 val
= (0 <= i
? Qt
: Qnil
);
340 if (NILP (Vinhibit_changing_match_data
) && i
>= 0)
341 for (i
= 0; i
< search_regs
.num_regs
; i
++)
342 if (search_regs
.start
[i
] >= 0)
345 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
347 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
350 /* Set last_thing_searched only when match data is changed. */
351 if (NILP (Vinhibit_changing_match_data
))
352 XSETBUFFER (last_thing_searched
, current_buffer
);
357 DEFUN ("looking-at", Flooking_at
, Slooking_at
, 1, 1, 0,
358 doc
: /* Return t if text after point matches regular expression REGEXP.
359 This function modifies the match data that `match-beginning',
360 `match-end' and `match-data' access; save and restore the match
361 data if you want to preserve them. */)
365 return looking_at_1 (regexp
, 0);
368 DEFUN ("posix-looking-at", Fposix_looking_at
, Sposix_looking_at
, 1, 1, 0,
369 doc
: /* Return t if text after point matches regular expression REGEXP.
370 Find the longest match, in accord with Posix regular expression rules.
371 This function modifies the match data that `match-beginning',
372 `match-end' and `match-data' access; save and restore the match
373 data if you want to preserve them. */)
377 return looking_at_1 (regexp
, 1);
381 string_match_1 (regexp
, string
, start
, posix
)
382 Lisp_Object regexp
, string
, start
;
386 struct re_pattern_buffer
*bufp
;
390 if (running_asynch_code
)
393 CHECK_STRING (regexp
);
394 CHECK_STRING (string
);
397 pos
= 0, pos_byte
= 0;
400 int len
= SCHARS (string
);
402 CHECK_NUMBER (start
);
404 if (pos
< 0 && -pos
<= len
)
406 else if (0 > pos
|| pos
> len
)
407 args_out_of_range (string
, start
);
408 pos_byte
= string_char_to_byte (string
, pos
);
411 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
412 XCHAR_TABLE (current_buffer
->case_canon_table
)->extras
[2]
413 = current_buffer
->case_eqv_table
;
415 bufp
= compile_pattern (regexp
,
416 (NILP (Vinhibit_changing_match_data
)
417 ? &search_regs
: NULL
),
418 (!NILP (current_buffer
->case_fold_search
)
419 ? current_buffer
->case_canon_table
: Qnil
),
421 STRING_MULTIBYTE (string
));
423 re_match_object
= string
;
425 val
= re_search (bufp
, (char *) SDATA (string
),
426 SBYTES (string
), pos_byte
,
427 SBYTES (string
) - pos_byte
,
428 (NILP (Vinhibit_changing_match_data
)
429 ? &search_regs
: NULL
));
432 /* Set last_thing_searched only when match data is changed. */
433 if (NILP (Vinhibit_changing_match_data
))
434 last_thing_searched
= Qt
;
438 if (val
< 0) return Qnil
;
440 if (NILP (Vinhibit_changing_match_data
))
441 for (i
= 0; i
< search_regs
.num_regs
; i
++)
442 if (search_regs
.start
[i
] >= 0)
445 = string_byte_to_char (string
, search_regs
.start
[i
]);
447 = string_byte_to_char (string
, search_regs
.end
[i
]);
450 return make_number (string_byte_to_char (string
, val
));
453 DEFUN ("string-match", Fstring_match
, Sstring_match
, 2, 3, 0,
454 doc
: /* Return index of start of first match for REGEXP in STRING, or nil.
455 Matching ignores case if `case-fold-search' is non-nil.
456 If third arg START is non-nil, start search at that index in STRING.
457 For index of first char beyond the match, do (match-end 0).
458 `match-end' and `match-beginning' also give indices of substrings
459 matched by parenthesis constructs in the pattern.
461 You can use the function `match-string' to extract the substrings
462 matched by the parenthesis constructions in REGEXP. */)
463 (regexp
, string
, start
)
464 Lisp_Object regexp
, string
, start
;
466 return string_match_1 (regexp
, string
, start
, 0);
469 DEFUN ("posix-string-match", Fposix_string_match
, Sposix_string_match
, 2, 3, 0,
470 doc
: /* Return index of start of first match for REGEXP in STRING, or nil.
471 Find the longest match, in accord with Posix regular expression rules.
472 Case is ignored if `case-fold-search' is non-nil in the current buffer.
473 If third arg START is non-nil, start search at that index in STRING.
474 For index of first char beyond the match, do (match-end 0).
475 `match-end' and `match-beginning' also give indices of substrings
476 matched by parenthesis constructs in the pattern. */)
477 (regexp
, string
, start
)
478 Lisp_Object regexp
, string
, start
;
480 return string_match_1 (regexp
, string
, start
, 1);
483 /* Match REGEXP against STRING, searching all of STRING,
484 and return the index of the match, or negative on failure.
485 This does not clobber the match data. */
488 fast_string_match (regexp
, string
)
489 Lisp_Object regexp
, string
;
492 struct re_pattern_buffer
*bufp
;
494 bufp
= compile_pattern (regexp
, 0, Qnil
,
495 0, STRING_MULTIBYTE (string
));
497 re_match_object
= string
;
499 val
= re_search (bufp
, (char *) SDATA (string
),
506 /* Match REGEXP against STRING, searching all of STRING ignoring case,
507 and return the index of the match, or negative on failure.
508 This does not clobber the match data.
509 We assume that STRING contains single-byte characters. */
511 extern Lisp_Object Vascii_downcase_table
;
514 fast_c_string_match_ignore_case (regexp
, string
)
519 struct re_pattern_buffer
*bufp
;
520 int len
= strlen (string
);
522 regexp
= string_make_unibyte (regexp
);
523 re_match_object
= Qt
;
524 bufp
= compile_pattern (regexp
, 0,
525 Vascii_canon_table
, 0,
528 val
= re_search (bufp
, string
, len
, 0, len
, 0);
533 /* Like fast_string_match but ignore case. */
536 fast_string_match_ignore_case (regexp
, string
)
537 Lisp_Object regexp
, string
;
540 struct re_pattern_buffer
*bufp
;
542 bufp
= compile_pattern (regexp
, 0, Vascii_canon_table
,
543 0, STRING_MULTIBYTE (string
));
545 re_match_object
= string
;
547 val
= re_search (bufp
, (char *) SDATA (string
),
554 /* The newline cache: remembering which sections of text have no newlines. */
556 /* If the user has requested newline caching, make sure it's on.
557 Otherwise, make sure it's off.
558 This is our cheezy way of associating an action with the change of
559 state of a buffer-local variable. */
561 newline_cache_on_off (buf
)
564 if (NILP (buf
->cache_long_line_scans
))
566 /* It should be off. */
567 if (buf
->newline_cache
)
569 free_region_cache (buf
->newline_cache
);
570 buf
->newline_cache
= 0;
575 /* It should be on. */
576 if (buf
->newline_cache
== 0)
577 buf
->newline_cache
= new_region_cache ();
582 /* Search for COUNT instances of the character TARGET between START and END.
584 If COUNT is positive, search forwards; END must be >= START.
585 If COUNT is negative, search backwards for the -COUNTth instance;
586 END must be <= START.
587 If COUNT is zero, do anything you please; run rogue, for all I care.
589 If END is zero, use BEGV or ZV instead, as appropriate for the
590 direction indicated by COUNT.
592 If we find COUNT instances, set *SHORTAGE to zero, and return the
593 position past the COUNTth match. Note that for reverse motion
594 this is not the same as the usual convention for Emacs motion commands.
596 If we don't find COUNT instances before reaching END, set *SHORTAGE
597 to the number of TARGETs left unfound, and return END.
599 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
600 except when inside redisplay. */
603 scan_buffer (target
, start
, end
, count
, shortage
, allow_quit
)
610 struct region_cache
*newline_cache
;
621 if (! end
) end
= BEGV
;
624 newline_cache_on_off (current_buffer
);
625 newline_cache
= current_buffer
->newline_cache
;
630 immediate_quit
= allow_quit
;
635 /* Our innermost scanning loop is very simple; it doesn't know
636 about gaps, buffer ends, or the newline cache. ceiling is
637 the position of the last character before the next such
638 obstacle --- the last character the dumb search loop should
640 int ceiling_byte
= CHAR_TO_BYTE (end
) - 1;
641 int start_byte
= CHAR_TO_BYTE (start
);
644 /* If we're looking for a newline, consult the newline cache
645 to see where we can avoid some scanning. */
646 if (target
== '\n' && newline_cache
)
650 while (region_cache_forward
651 (current_buffer
, newline_cache
, start_byte
, &next_change
))
652 start_byte
= next_change
;
653 immediate_quit
= allow_quit
;
655 /* START should never be after END. */
656 if (start_byte
> ceiling_byte
)
657 start_byte
= ceiling_byte
;
659 /* Now the text after start is an unknown region, and
660 next_change is the position of the next known region. */
661 ceiling_byte
= min (next_change
- 1, ceiling_byte
);
664 /* The dumb loop can only scan text stored in contiguous
665 bytes. BUFFER_CEILING_OF returns the last character
666 position that is contiguous, so the ceiling is the
667 position after that. */
668 tem
= BUFFER_CEILING_OF (start_byte
);
669 ceiling_byte
= min (tem
, ceiling_byte
);
672 /* The termination address of the dumb loop. */
673 register unsigned char *ceiling_addr
674 = BYTE_POS_ADDR (ceiling_byte
) + 1;
675 register unsigned char *cursor
676 = BYTE_POS_ADDR (start_byte
);
677 unsigned char *base
= cursor
;
679 while (cursor
< ceiling_addr
)
681 unsigned char *scan_start
= cursor
;
684 while (*cursor
!= target
&& ++cursor
< ceiling_addr
)
687 /* If we're looking for newlines, cache the fact that
688 the region from start to cursor is free of them. */
689 if (target
== '\n' && newline_cache
)
690 know_region_cache (current_buffer
, newline_cache
,
691 start_byte
+ scan_start
- base
,
692 start_byte
+ cursor
- base
);
694 /* Did we find the target character? */
695 if (cursor
< ceiling_addr
)
700 return BYTE_TO_CHAR (start_byte
+ cursor
- base
+ 1);
706 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
712 /* The last character to check before the next obstacle. */
713 int ceiling_byte
= CHAR_TO_BYTE (end
);
714 int start_byte
= CHAR_TO_BYTE (start
);
717 /* Consult the newline cache, if appropriate. */
718 if (target
== '\n' && newline_cache
)
722 while (region_cache_backward
723 (current_buffer
, newline_cache
, start_byte
, &next_change
))
724 start_byte
= next_change
;
725 immediate_quit
= allow_quit
;
727 /* Start should never be at or before end. */
728 if (start_byte
<= ceiling_byte
)
729 start_byte
= ceiling_byte
+ 1;
731 /* Now the text before start is an unknown region, and
732 next_change is the position of the next known region. */
733 ceiling_byte
= max (next_change
, ceiling_byte
);
736 /* Stop scanning before the gap. */
737 tem
= BUFFER_FLOOR_OF (start_byte
- 1);
738 ceiling_byte
= max (tem
, ceiling_byte
);
741 /* The termination address of the dumb loop. */
742 register unsigned char *ceiling_addr
= BYTE_POS_ADDR (ceiling_byte
);
743 register unsigned char *cursor
= BYTE_POS_ADDR (start_byte
- 1);
744 unsigned char *base
= cursor
;
746 while (cursor
>= ceiling_addr
)
748 unsigned char *scan_start
= cursor
;
750 while (*cursor
!= target
&& --cursor
>= ceiling_addr
)
753 /* If we're looking for newlines, cache the fact that
754 the region from after the cursor to start is free of them. */
755 if (target
== '\n' && newline_cache
)
756 know_region_cache (current_buffer
, newline_cache
,
757 start_byte
+ cursor
- base
,
758 start_byte
+ scan_start
- base
);
760 /* Did we find the target character? */
761 if (cursor
>= ceiling_addr
)
766 return BYTE_TO_CHAR (start_byte
+ cursor
- base
);
772 start
= BYTE_TO_CHAR (start_byte
+ cursor
- base
);
778 *shortage
= count
* direction
;
782 /* Search for COUNT instances of a line boundary, which means either a
783 newline or (if selective display enabled) a carriage return.
784 Start at START. If COUNT is negative, search backwards.
786 We report the resulting position by calling TEMP_SET_PT_BOTH.
788 If we find COUNT instances. we position after (always after,
789 even if scanning backwards) the COUNTth match, and return 0.
791 If we don't find COUNT instances before reaching the end of the
792 buffer (or the beginning, if scanning backwards), we return
793 the number of line boundaries left unfound, and position at
794 the limit we bumped up against.
796 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
797 except in special cases. */
800 scan_newline (start
, start_byte
, limit
, limit_byte
, count
, allow_quit
)
801 int start
, start_byte
;
802 int limit
, limit_byte
;
806 int direction
= ((count
> 0) ? 1 : -1);
808 register unsigned char *cursor
;
811 register int ceiling
;
812 register unsigned char *ceiling_addr
;
814 int old_immediate_quit
= immediate_quit
;
816 /* The code that follows is like scan_buffer
817 but checks for either newline or carriage return. */
822 start_byte
= CHAR_TO_BYTE (start
);
826 while (start_byte
< limit_byte
)
828 ceiling
= BUFFER_CEILING_OF (start_byte
);
829 ceiling
= min (limit_byte
- 1, ceiling
);
830 ceiling_addr
= BYTE_POS_ADDR (ceiling
) + 1;
831 base
= (cursor
= BYTE_POS_ADDR (start_byte
));
834 while (*cursor
!= '\n' && ++cursor
!= ceiling_addr
)
837 if (cursor
!= ceiling_addr
)
841 immediate_quit
= old_immediate_quit
;
842 start_byte
= start_byte
+ cursor
- base
+ 1;
843 start
= BYTE_TO_CHAR (start_byte
);
844 TEMP_SET_PT_BOTH (start
, start_byte
);
848 if (++cursor
== ceiling_addr
)
854 start_byte
+= cursor
- base
;
859 while (start_byte
> limit_byte
)
861 ceiling
= BUFFER_FLOOR_OF (start_byte
- 1);
862 ceiling
= max (limit_byte
, ceiling
);
863 ceiling_addr
= BYTE_POS_ADDR (ceiling
) - 1;
864 base
= (cursor
= BYTE_POS_ADDR (start_byte
- 1) + 1);
867 while (--cursor
!= ceiling_addr
&& *cursor
!= '\n')
870 if (cursor
!= ceiling_addr
)
874 immediate_quit
= old_immediate_quit
;
875 /* Return the position AFTER the match we found. */
876 start_byte
= start_byte
+ cursor
- base
+ 1;
877 start
= BYTE_TO_CHAR (start_byte
);
878 TEMP_SET_PT_BOTH (start
, start_byte
);
885 /* Here we add 1 to compensate for the last decrement
886 of CURSOR, which took it past the valid range. */
887 start_byte
+= cursor
- base
+ 1;
891 TEMP_SET_PT_BOTH (limit
, limit_byte
);
892 immediate_quit
= old_immediate_quit
;
894 return count
* direction
;
898 find_next_newline_no_quit (from
, cnt
)
899 register int from
, cnt
;
901 return scan_buffer ('\n', from
, 0, cnt
, (int *) 0, 0);
904 /* Like find_next_newline, but returns position before the newline,
905 not after, and only search up to TO. This isn't just
906 find_next_newline (...)-1, because you might hit TO. */
909 find_before_next_newline (from
, to
, cnt
)
913 int pos
= scan_buffer ('\n', from
, to
, cnt
, &shortage
, 1);
921 /* Subroutines of Lisp buffer search functions. */
924 search_command (string
, bound
, noerror
, count
, direction
, RE
, posix
)
925 Lisp_Object string
, bound
, noerror
, count
;
936 CHECK_NUMBER (count
);
940 CHECK_STRING (string
);
944 lim
= ZV
, lim_byte
= ZV_BYTE
;
946 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
950 CHECK_NUMBER_COERCE_MARKER (bound
);
952 if (n
> 0 ? lim
< PT
: lim
> PT
)
953 error ("Invalid search bound (wrong side of point)");
955 lim
= ZV
, lim_byte
= ZV_BYTE
;
957 lim
= BEGV
, lim_byte
= BEGV_BYTE
;
959 lim_byte
= CHAR_TO_BYTE (lim
);
962 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
963 XCHAR_TABLE (current_buffer
->case_canon_table
)->extras
[2]
964 = current_buffer
->case_eqv_table
;
966 np
= search_buffer (string
, PT
, PT_BYTE
, lim
, lim_byte
, n
, RE
,
967 (!NILP (current_buffer
->case_fold_search
)
968 ? current_buffer
->case_canon_table
970 (!NILP (current_buffer
->case_fold_search
)
971 ? current_buffer
->case_eqv_table
977 xsignal1 (Qsearch_failed
, string
);
979 if (!EQ (noerror
, Qt
))
981 if (lim
< BEGV
|| lim
> ZV
)
983 SET_PT_BOTH (lim
, lim_byte
);
985 #if 0 /* This would be clean, but maybe programs depend on
986 a value of nil here. */
994 if (np
< BEGV
|| np
> ZV
)
999 return make_number (np
);
1002 /* Return 1 if REGEXP it matches just one constant string. */
1005 trivial_regexp_p (regexp
)
1008 int len
= SBYTES (regexp
);
1009 unsigned char *s
= SDATA (regexp
);
1014 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1021 case '|': case '(': case ')': case '`': case '\'': case 'b':
1022 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1023 case 'S': case '=': case '{': case '}': case '_':
1024 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1025 case '1': case '2': case '3': case '4': case '5':
1026 case '6': case '7': case '8': case '9':
1034 /* Search for the n'th occurrence of STRING in the current buffer,
1035 starting at position POS and stopping at position LIM,
1036 treating STRING as a literal string if RE is false or as
1037 a regular expression if RE is true.
1039 If N is positive, searching is forward and LIM must be greater than POS.
1040 If N is negative, searching is backward and LIM must be less than POS.
1042 Returns -x if x occurrences remain to be found (x > 0),
1043 or else the position at the beginning of the Nth occurrence
1044 (if searching backward) or the end (if searching forward).
1046 POSIX is nonzero if we want full backtracking (POSIX style)
1047 for this pattern. 0 means backtrack only enough to get a valid match. */
1049 #define TRANSLATE(out, trt, d) \
1055 temp = Faref (trt, make_number (d)); \
1056 if (INTEGERP (temp)) \
1057 out = XINT (temp); \
1066 /* Only used in search_buffer, to record the end position of the match
1067 when searching regexps and SEARCH_REGS should not be changed
1068 (i.e. Vinhibit_changing_match_data is non-nil). */
1069 static struct re_registers search_regs_1
;
1072 search_buffer (string
, pos
, pos_byte
, lim
, lim_byte
, n
,
1073 RE
, trt
, inverse_trt
, posix
)
1082 Lisp_Object inverse_trt
;
1085 int len
= SCHARS (string
);
1086 int len_byte
= SBYTES (string
);
1089 if (running_asynch_code
)
1090 save_search_regs ();
1092 /* Searching 0 times means don't move. */
1093 /* Null string is found at starting position. */
1094 if (len
== 0 || n
== 0)
1096 set_search_regs (pos_byte
, 0);
1100 if (RE
&& !(trivial_regexp_p (string
) && NILP (Vsearch_spaces_regexp
)))
1102 unsigned char *p1
, *p2
;
1104 struct re_pattern_buffer
*bufp
;
1106 bufp
= compile_pattern (string
,
1107 (NILP (Vinhibit_changing_match_data
)
1108 ? &search_regs
: &search_regs_1
),
1110 !NILP (current_buffer
->enable_multibyte_characters
));
1112 immediate_quit
= 1; /* Quit immediately if user types ^G,
1113 because letting this function finish
1114 can take too long. */
1115 QUIT
; /* Do a pending quit right away,
1116 to avoid paradoxical behavior */
1117 /* Get pointers and sizes of the two strings
1118 that make up the visible portion of the buffer. */
1121 s1
= GPT_BYTE
- BEGV_BYTE
;
1123 s2
= ZV_BYTE
- GPT_BYTE
;
1127 s2
= ZV_BYTE
- BEGV_BYTE
;
1132 s1
= ZV_BYTE
- BEGV_BYTE
;
1135 re_match_object
= Qnil
;
1140 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1141 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1142 (NILP (Vinhibit_changing_match_data
)
1143 ? &search_regs
: &search_regs_1
),
1144 /* Don't allow match past current point */
1145 pos_byte
- BEGV_BYTE
);
1148 matcher_overflow ();
1152 if (NILP (Vinhibit_changing_match_data
))
1154 pos_byte
= search_regs
.start
[0] + BEGV_BYTE
;
1155 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1156 if (search_regs
.start
[i
] >= 0)
1158 search_regs
.start
[i
]
1159 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1161 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1163 XSETBUFFER (last_thing_searched
, current_buffer
);
1164 /* Set pos to the new position. */
1165 pos
= search_regs
.start
[0];
1169 pos_byte
= search_regs_1
.start
[0] + BEGV_BYTE
;
1170 /* Set pos to the new position. */
1171 pos
= BYTE_TO_CHAR (search_regs_1
.start
[0] + BEGV_BYTE
);
1184 val
= re_search_2 (bufp
, (char *) p1
, s1
, (char *) p2
, s2
,
1185 pos_byte
- BEGV_BYTE
, lim_byte
- pos_byte
,
1186 (NILP (Vinhibit_changing_match_data
)
1187 ? &search_regs
: &search_regs_1
),
1188 lim_byte
- BEGV_BYTE
);
1191 matcher_overflow ();
1195 if (NILP (Vinhibit_changing_match_data
))
1197 pos_byte
= search_regs
.end
[0] + BEGV_BYTE
;
1198 for (i
= 0; i
< search_regs
.num_regs
; i
++)
1199 if (search_regs
.start
[i
] >= 0)
1201 search_regs
.start
[i
]
1202 = BYTE_TO_CHAR (search_regs
.start
[i
] + BEGV_BYTE
);
1204 = BYTE_TO_CHAR (search_regs
.end
[i
] + BEGV_BYTE
);
1206 XSETBUFFER (last_thing_searched
, current_buffer
);
1207 pos
= search_regs
.end
[0];
1211 pos_byte
= search_regs_1
.end
[0] + BEGV_BYTE
;
1212 pos
= BYTE_TO_CHAR (search_regs_1
.end
[0] + BEGV_BYTE
);
1225 else /* non-RE case */
1227 unsigned char *raw_pattern
, *pat
;
1228 int raw_pattern_size
;
1229 int raw_pattern_size_byte
;
1230 unsigned char *patbuf
;
1231 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
1232 unsigned char *base_pat
;
1233 /* Set to positive if we find a non-ASCII char that need
1234 translation. Otherwise set to zero later. */
1236 int boyer_moore_ok
= 1;
1238 /* MULTIBYTE says whether the text to be searched is multibyte.
1239 We must convert PATTERN to match that, or we will not really
1240 find things right. */
1242 if (multibyte
== STRING_MULTIBYTE (string
))
1244 raw_pattern
= (unsigned char *) SDATA (string
);
1245 raw_pattern_size
= SCHARS (string
);
1246 raw_pattern_size_byte
= SBYTES (string
);
1250 raw_pattern_size
= SCHARS (string
);
1251 raw_pattern_size_byte
1252 = count_size_as_multibyte (SDATA (string
),
1254 raw_pattern
= (unsigned char *) alloca (raw_pattern_size_byte
+ 1);
1255 copy_text (SDATA (string
), raw_pattern
,
1256 SCHARS (string
), 0, 1);
1260 /* Converting multibyte to single-byte.
1262 ??? Perhaps this conversion should be done in a special way
1263 by subtracting nonascii-insert-offset from each non-ASCII char,
1264 so that only the multibyte chars which really correspond to
1265 the chosen single-byte character set can possibly match. */
1266 raw_pattern_size
= SCHARS (string
);
1267 raw_pattern_size_byte
= SCHARS (string
);
1268 raw_pattern
= (unsigned char *) alloca (raw_pattern_size
+ 1);
1269 copy_text (SDATA (string
), raw_pattern
,
1270 SBYTES (string
), 1, 0);
1273 /* Copy and optionally translate the pattern. */
1274 len
= raw_pattern_size
;
1275 len_byte
= raw_pattern_size_byte
;
1276 patbuf
= (unsigned char *) alloca (len
* MAX_MULTIBYTE_LENGTH
);
1278 base_pat
= raw_pattern
;
1281 /* Fill patbuf by translated characters in STRING while
1282 checking if we can use boyer-moore search. If TRT is
1283 non-nil, we can use boyer-moore search only if TRT can be
1284 represented by the byte array of 256 elements. For that,
1285 all non-ASCII case-equivalents of all case-senstive
1286 characters in STRING must belong to the same charset and
1291 unsigned char str_base
[MAX_MULTIBYTE_LENGTH
], *str
;
1292 int c
, translated
, inverse
;
1293 int in_charlen
, charlen
;
1295 /* If we got here and the RE flag is set, it's because we're
1296 dealing with a regexp known to be trivial, so the backslash
1297 just quotes the next character. */
1298 if (RE
&& *base_pat
== '\\')
1306 c
= STRING_CHAR_AND_LENGTH (base_pat
, len_byte
, in_charlen
);
1311 charlen
= in_charlen
;
1315 /* Translate the character. */
1316 TRANSLATE (translated
, trt
, c
);
1317 charlen
= CHAR_STRING (translated
, str_base
);
1320 /* Check if C has any other case-equivalents. */
1321 TRANSLATE (inverse
, inverse_trt
, c
);
1322 /* If so, check if we can use boyer-moore. */
1323 if (c
!= inverse
&& boyer_moore_ok
)
1325 /* Check if all equivalents belong to the same
1326 group of characters. Note that the check of C
1327 itself is done by the last iteration. */
1328 int this_char_base
= -1;
1330 while (boyer_moore_ok
)
1332 if (ASCII_BYTE_P (inverse
))
1334 if (this_char_base
> 0)
1340 char_base
= this_char_base
;
1343 else if (CHAR_BYTE8_P (inverse
))
1344 /* Boyer-moore search can't handle a
1345 translation of an eight-bit
1348 else if (this_char_base
< 0)
1350 this_char_base
= inverse
& ~0x3F;
1352 char_base
= this_char_base
;
1353 else if (char_base
> 0
1354 && this_char_base
!= char_base
)
1357 else if ((inverse
& ~0x3F) != this_char_base
)
1361 TRANSLATE (inverse
, inverse_trt
, inverse
);
1368 /* Store this character into the translated pattern. */
1369 bcopy (str
, pat
, charlen
);
1371 base_pat
+= in_charlen
;
1372 len_byte
-= in_charlen
;
1377 /* Unibyte buffer. */
1383 /* If we got here and the RE flag is set, it's because we're
1384 dealing with a regexp known to be trivial, so the backslash
1385 just quotes the next character. */
1386 if (RE
&& *base_pat
== '\\')
1393 TRANSLATE (translated
, trt
, c
);
1394 *pat
++ = translated
;
1398 len_byte
= pat
- patbuf
;
1399 len
= raw_pattern_size
;
1400 pat
= base_pat
= patbuf
;
1403 return boyer_moore (n
, pat
, len
, len_byte
, trt
, inverse_trt
,
1404 pos
, pos_byte
, lim
, lim_byte
,
1407 return simple_search (n
, pat
, len
, len_byte
, trt
,
1408 pos
, pos_byte
, lim
, lim_byte
);
1412 /* Do a simple string search N times for the string PAT,
1413 whose length is LEN/LEN_BYTE,
1414 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1415 TRT is the translation table.
1417 Return the character position where the match is found.
1418 Otherwise, if M matches remained to be found, return -M.
1420 This kind of search works regardless of what is in PAT and
1421 regardless of what is in TRT. It is used in cases where
1422 boyer_moore cannot work. */
1425 simple_search (n
, pat
, len
, len_byte
, trt
, pos
, pos_byte
, lim
, lim_byte
)
1433 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1434 int forward
= n
> 0;
1435 /* Number of buffer bytes matched. Note that this may be different
1436 from len_byte in a multibyte buffer. */
1439 if (lim
> pos
&& multibyte
)
1444 /* Try matching at position POS. */
1446 int this_pos_byte
= pos_byte
;
1448 int this_len_byte
= len_byte
;
1449 unsigned char *p
= pat
;
1450 if (pos
+ len
> lim
|| pos_byte
+ len_byte
> lim_byte
)
1453 while (this_len
> 0)
1455 int charlen
, buf_charlen
;
1458 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1459 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1460 ZV_BYTE
- this_pos_byte
,
1462 TRANSLATE (buf_ch
, trt
, buf_ch
);
1464 if (buf_ch
!= pat_ch
)
1467 this_len_byte
-= charlen
;
1471 this_pos_byte
+= buf_charlen
;
1477 match_byte
= this_pos_byte
- pos_byte
;
1479 pos_byte
+= match_byte
;
1483 INC_BOTH (pos
, pos_byte
);
1493 /* Try matching at position POS. */
1496 unsigned char *p
= pat
;
1498 if (pos
+ len
> lim
)
1501 while (this_len
> 0)
1504 int buf_ch
= FETCH_BYTE (this_pos
);
1505 TRANSLATE (buf_ch
, trt
, buf_ch
);
1507 if (buf_ch
!= pat_ch
)
1526 /* Backwards search. */
1527 else if (lim
< pos
&& multibyte
)
1532 /* Try matching at position POS. */
1533 int this_pos
= pos
- len
;
1536 int this_len_byte
= len_byte
;
1537 unsigned char *p
= pat
;
1539 if (this_pos
< lim
|| (pos_byte
- len_byte
) < lim_byte
)
1541 this_pos_byte
= CHAR_TO_BYTE (this_pos
);
1542 match_byte
= pos_byte
- this_pos_byte
;
1544 while (this_len
> 0)
1546 int charlen
, buf_charlen
;
1549 pat_ch
= STRING_CHAR_AND_LENGTH (p
, this_len_byte
, charlen
);
1550 buf_ch
= STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte
),
1551 ZV_BYTE
- this_pos_byte
,
1553 TRANSLATE (buf_ch
, trt
, buf_ch
);
1555 if (buf_ch
!= pat_ch
)
1558 this_len_byte
-= charlen
;
1561 this_pos_byte
+= buf_charlen
;
1568 pos_byte
-= match_byte
;
1572 DEC_BOTH (pos
, pos_byte
);
1582 /* Try matching at position POS. */
1583 int this_pos
= pos
- len
;
1585 unsigned char *p
= pat
;
1590 while (this_len
> 0)
1593 int buf_ch
= FETCH_BYTE (this_pos
);
1594 TRANSLATE (buf_ch
, trt
, buf_ch
);
1596 if (buf_ch
!= pat_ch
)
1619 set_search_regs ((multibyte
? pos_byte
: pos
) - match_byte
, match_byte
);
1621 set_search_regs (multibyte
? pos_byte
: pos
, match_byte
);
1631 /* Do Boyer-Moore search N times for the string BASE_PAT,
1632 whose length is LEN/LEN_BYTE,
1633 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1634 DIRECTION says which direction we search in.
1635 TRT and INVERSE_TRT are translation tables.
1636 Characters in PAT are already translated by TRT.
1638 This kind of search works if all the characters in BASE_PAT that
1639 have nontrivial translation are the same aside from the last byte.
1640 This makes it possible to translate just the last byte of a
1641 character, and do so after just a simple test of the context.
1642 CHAR_BASE is nonzero if there is such a non-ASCII character.
1644 If that criterion is not satisfied, do not call this function. */
1647 boyer_moore (n
, base_pat
, len
, len_byte
, trt
, inverse_trt
,
1648 pos
, pos_byte
, lim
, lim_byte
, char_base
)
1650 unsigned char *base_pat
;
1653 Lisp_Object inverse_trt
;
1658 int direction
= ((n
> 0) ? 1 : -1);
1659 register int dirlen
;
1660 int infinity
, limit
, stride_for_teases
= 0;
1661 register int *BM_tab
;
1663 register unsigned char *cursor
, *p_limit
;
1665 unsigned char *pat
, *pat_end
;
1666 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1668 unsigned char simple_translate
[0400];
1669 /* These are set to the preceding bytes of a byte to be translated
1670 if char_base is nonzero. As the maximum byte length of a
1671 multibyte character is 5, we have to check at most four previous
1673 int translate_prev_byte1
= 0;
1674 int translate_prev_byte2
= 0;
1675 int translate_prev_byte3
= 0;
1676 int translate_prev_byte4
= 0;
1678 BM_tab
= (int *) alloca (0400 * sizeof (int));
1680 /* The general approach is that we are going to maintain that we know */
1681 /* the first (closest to the present position, in whatever direction */
1682 /* we're searching) character that could possibly be the last */
1683 /* (furthest from present position) character of a valid match. We */
1684 /* advance the state of our knowledge by looking at that character */
1685 /* and seeing whether it indeed matches the last character of the */
1686 /* pattern. If it does, we take a closer look. If it does not, we */
1687 /* move our pointer (to putative last characters) as far as is */
1688 /* logically possible. This amount of movement, which I call a */
1689 /* stride, will be the length of the pattern if the actual character */
1690 /* appears nowhere in the pattern, otherwise it will be the distance */
1691 /* from the last occurrence of that character to the end of the */
1693 /* As a coding trick, an enormous stride is coded into the table for */
1694 /* characters that match the last character. This allows use of only */
1695 /* a single test, a test for having gone past the end of the */
1696 /* permissible match region, to test for both possible matches (when */
1697 /* the stride goes past the end immediately) and failure to */
1698 /* match (where you get nudged past the end one stride at a time). */
1700 /* Here we make a "mickey mouse" BM table. The stride of the search */
1701 /* is determined only by the last character of the putative match. */
1702 /* If that character does not match, we will stride the proper */
1703 /* distance to propose a match that superimposes it on the last */
1704 /* instance of a character that matches it (per trt), or misses */
1705 /* it entirely if there is none. */
1707 dirlen
= len_byte
* direction
;
1708 infinity
= dirlen
- (lim_byte
+ pos_byte
+ len_byte
+ len_byte
) * direction
;
1710 /* Record position after the end of the pattern. */
1711 pat_end
= base_pat
+ len_byte
;
1712 /* BASE_PAT points to a character that we start scanning from.
1713 It is the first character in a forward search,
1714 the last character in a backward search. */
1716 base_pat
= pat_end
- 1;
1718 BM_tab_base
= BM_tab
;
1720 j
= dirlen
; /* to get it in a register */
1721 /* A character that does not appear in the pattern induces a */
1722 /* stride equal to the pattern length. */
1723 while (BM_tab_base
!= BM_tab
)
1731 /* We use this for translation, instead of TRT itself.
1732 We fill this in to handle the characters that actually
1733 occur in the pattern. Others don't matter anyway! */
1734 bzero (simple_translate
, sizeof simple_translate
);
1735 for (i
= 0; i
< 0400; i
++)
1736 simple_translate
[i
] = i
;
1740 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
1741 byte following them are the target of translation. */
1742 unsigned char str
[MAX_MULTIBYTE_LENGTH
];
1743 int len
= CHAR_STRING (char_base
, str
);
1745 translate_prev_byte1
= str
[len
- 2];
1748 translate_prev_byte2
= str
[len
- 3];
1751 translate_prev_byte3
= str
[len
- 4];
1753 translate_prev_byte4
= str
[len
- 5];
1759 while (i
!= infinity
)
1761 unsigned char *ptr
= base_pat
+ i
;
1767 /* If the byte currently looking at is the last of a
1768 character to check case-equivalents, set CH to that
1769 character. An ASCII character and a non-ASCII character
1770 matching with CHAR_BASE are to be checked. */
1773 if (ASCII_BYTE_P (*ptr
) || ! multibyte
)
1776 && ((pat_end
- ptr
) == 1 || CHAR_HEAD_P (ptr
[1])))
1778 unsigned char *charstart
= ptr
- 1;
1780 while (! (CHAR_HEAD_P (*charstart
)))
1782 ch
= STRING_CHAR (charstart
, ptr
- charstart
+ 1);
1783 if (char_base
!= (ch
& ~0x3F))
1788 j
= (ch
& 0x3F) | 0200;
1793 stride_for_teases
= BM_tab
[j
];
1795 BM_tab
[j
] = dirlen
- i
;
1796 /* A translation table is accompanied by its inverse -- see */
1797 /* comment following downcase_table for details */
1800 int starting_ch
= ch
;
1805 TRANSLATE (ch
, inverse_trt
, ch
);
1807 j
= (ch
& 0x3F) | 0200;
1811 /* For all the characters that map into CH,
1812 set up simple_translate to map the last byte
1814 simple_translate
[j
] = starting_j
;
1815 if (ch
== starting_ch
)
1817 BM_tab
[j
] = dirlen
- i
;
1826 stride_for_teases
= BM_tab
[j
];
1827 BM_tab
[j
] = dirlen
- i
;
1829 /* stride_for_teases tells how much to stride if we get a */
1830 /* match on the far character but are subsequently */
1831 /* disappointed, by recording what the stride would have been */
1832 /* for that character if the last character had been */
1835 infinity
= dirlen
- infinity
;
1836 pos_byte
+= dirlen
- ((direction
> 0) ? direction
: 0);
1837 /* loop invariant - POS_BYTE points at where last char (first
1838 char if reverse) of pattern would align in a possible match. */
1842 unsigned char *tail_end_ptr
;
1844 /* It's been reported that some (broken) compiler thinks that
1845 Boolean expressions in an arithmetic context are unsigned.
1846 Using an explicit ?1:0 prevents this. */
1847 if ((lim_byte
- pos_byte
- ((direction
> 0) ? 1 : 0)) * direction
1849 return (n
* (0 - direction
));
1850 /* First we do the part we can by pointers (maybe nothing) */
1853 limit
= pos_byte
- dirlen
+ direction
;
1856 limit
= BUFFER_CEILING_OF (limit
);
1857 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1858 can take on without hitting edge of buffer or the gap. */
1859 limit
= min (limit
, pos_byte
+ 20000);
1860 limit
= min (limit
, lim_byte
- 1);
1864 limit
= BUFFER_FLOOR_OF (limit
);
1865 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1866 can take on without hitting edge of buffer or the gap. */
1867 limit
= max (limit
, pos_byte
- 20000);
1868 limit
= max (limit
, lim_byte
);
1870 tail_end
= BUFFER_CEILING_OF (pos_byte
) + 1;
1871 tail_end_ptr
= BYTE_POS_ADDR (tail_end
);
1873 if ((limit
- pos_byte
) * direction
> 20)
1877 p_limit
= BYTE_POS_ADDR (limit
);
1878 p2
= (cursor
= BYTE_POS_ADDR (pos_byte
));
1879 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1880 while (1) /* use one cursor setting as long as i can */
1882 if (direction
> 0) /* worth duplicating */
1884 /* Use signed comparison if appropriate
1885 to make cursor+infinity sure to be > p_limit.
1886 Assuming that the buffer lies in a range of addresses
1887 that are all "positive" (as ints) or all "negative",
1888 either kind of comparison will work as long
1889 as we don't step by infinity. So pick the kind
1890 that works when we do step by infinity. */
1891 if ((EMACS_INT
) (p_limit
+ infinity
) > (EMACS_INT
) p_limit
)
1892 while ((EMACS_INT
) cursor
<= (EMACS_INT
) p_limit
)
1893 cursor
+= BM_tab
[*cursor
];
1895 while ((EMACS_UINT
) cursor
<= (EMACS_UINT
) p_limit
)
1896 cursor
+= BM_tab
[*cursor
];
1900 if ((EMACS_INT
) (p_limit
+ infinity
) < (EMACS_INT
) p_limit
)
1901 while ((EMACS_INT
) cursor
>= (EMACS_INT
) p_limit
)
1902 cursor
+= BM_tab
[*cursor
];
1904 while ((EMACS_UINT
) cursor
>= (EMACS_UINT
) p_limit
)
1905 cursor
+= BM_tab
[*cursor
];
1907 /* If you are here, cursor is beyond the end of the searched region. */
1908 /* This can happen if you match on the far character of the pattern, */
1909 /* because the "stride" of that character is infinity, a number able */
1910 /* to throw you well beyond the end of the search. It can also */
1911 /* happen if you fail to match within the permitted region and would */
1912 /* otherwise try a character beyond that region */
1913 if ((cursor
- p_limit
) * direction
<= len_byte
)
1914 break; /* a small overrun is genuine */
1915 cursor
-= infinity
; /* large overrun = hit */
1916 i
= dirlen
- direction
;
1919 while ((i
-= direction
) + direction
!= 0)
1922 cursor
-= direction
;
1923 /* Translate only the last byte of a character. */
1925 || ((cursor
== tail_end_ptr
1926 || CHAR_HEAD_P (cursor
[1]))
1927 && (CHAR_HEAD_P (cursor
[0])
1928 /* Check if this is the last byte of
1929 a translable character. */
1930 || (translate_prev_byte1
== cursor
[-1]
1931 && (CHAR_HEAD_P (translate_prev_byte1
)
1932 || (translate_prev_byte2
== cursor
[-2]
1933 && (CHAR_HEAD_P (translate_prev_byte2
)
1934 || (translate_prev_byte3
== cursor
[-3]))))))))
1935 ch
= simple_translate
[*cursor
];
1944 while ((i
-= direction
) + direction
!= 0)
1946 cursor
-= direction
;
1947 if (pat
[i
] != *cursor
)
1951 cursor
+= dirlen
- i
- direction
; /* fix cursor */
1952 if (i
+ direction
== 0)
1954 int position
, start
, end
;
1956 cursor
-= direction
;
1958 position
= pos_byte
+ cursor
- p2
+ ((direction
> 0)
1959 ? 1 - len_byte
: 0);
1960 set_search_regs (position
, len_byte
);
1962 if (NILP (Vinhibit_changing_match_data
))
1964 start
= search_regs
.start
[0];
1965 end
= search_regs
.end
[0];
1968 /* If Vinhibit_changing_match_data is non-nil,
1969 search_regs will not be changed. So let's
1970 compute start and end here. */
1972 start
= BYTE_TO_CHAR (position
);
1973 end
= BYTE_TO_CHAR (position
+ len_byte
);
1976 if ((n
-= direction
) != 0)
1977 cursor
+= dirlen
; /* to resume search */
1979 return direction
> 0 ? end
: start
;
1982 cursor
+= stride_for_teases
; /* <sigh> we lose - */
1984 pos_byte
+= cursor
- p2
;
1987 /* Now we'll pick up a clump that has to be done the hard */
1988 /* way because it covers a discontinuity */
1990 limit
= ((direction
> 0)
1991 ? BUFFER_CEILING_OF (pos_byte
- dirlen
+ 1)
1992 : BUFFER_FLOOR_OF (pos_byte
- dirlen
- 1));
1993 limit
= ((direction
> 0)
1994 ? min (limit
+ len_byte
, lim_byte
- 1)
1995 : max (limit
- len_byte
, lim_byte
));
1996 /* LIMIT is now the last value POS_BYTE can have
1997 and still be valid for a possible match. */
2000 /* This loop can be coded for space rather than */
2001 /* speed because it will usually run only once. */
2002 /* (the reach is at most len + 21, and typically */
2003 /* does not exceed len) */
2004 while ((limit
- pos_byte
) * direction
>= 0)
2005 pos_byte
+= BM_tab
[FETCH_BYTE (pos_byte
)];
2006 /* now run the same tests to distinguish going off the */
2007 /* end, a match or a phony match. */
2008 if ((pos_byte
- limit
) * direction
<= len_byte
)
2009 break; /* ran off the end */
2010 /* Found what might be a match.
2011 Set POS_BYTE back to last (first if reverse) pos. */
2012 pos_byte
-= infinity
;
2013 i
= dirlen
- direction
;
2014 while ((i
-= direction
) + direction
!= 0)
2018 pos_byte
-= direction
;
2019 ptr
= BYTE_POS_ADDR (pos_byte
);
2020 /* Translate only the last byte of a character. */
2022 || ((ptr
== tail_end_ptr
2023 || CHAR_HEAD_P (ptr
[1]))
2024 && (CHAR_HEAD_P (ptr
[0])
2025 /* Check if this is the last byte of a
2026 translable character. */
2027 || (translate_prev_byte1
== ptr
[-1]
2028 && (CHAR_HEAD_P (translate_prev_byte1
)
2029 || (translate_prev_byte2
== ptr
[-2]
2030 && (CHAR_HEAD_P (translate_prev_byte2
)
2031 || translate_prev_byte3
== ptr
[-3])))))))
2032 ch
= simple_translate
[*ptr
];
2038 /* Above loop has moved POS_BYTE part or all the way
2039 back to the first pos (last pos if reverse).
2040 Set it once again at the last (first if reverse) char. */
2041 pos_byte
+= dirlen
- i
- direction
;
2042 if (i
+ direction
== 0)
2044 int position
, start
, end
;
2045 pos_byte
-= direction
;
2047 position
= pos_byte
+ ((direction
> 0) ? 1 - len_byte
: 0);
2048 set_search_regs (position
, len_byte
);
2050 if (NILP (Vinhibit_changing_match_data
))
2052 start
= search_regs
.start
[0];
2053 end
= search_regs
.end
[0];
2056 /* If Vinhibit_changing_match_data is non-nil,
2057 search_regs will not be changed. So let's
2058 compute start and end here. */
2060 start
= BYTE_TO_CHAR (position
);
2061 end
= BYTE_TO_CHAR (position
+ len_byte
);
2064 if ((n
-= direction
) != 0)
2065 pos_byte
+= dirlen
; /* to resume search */
2067 return direction
> 0 ? end
: start
;
2070 pos_byte
+= stride_for_teases
;
2073 /* We have done one clump. Can we continue? */
2074 if ((lim_byte
- pos_byte
) * direction
< 0)
2075 return ((0 - n
) * direction
);
2077 return BYTE_TO_CHAR (pos_byte
);
2080 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2081 for the overall match just found in the current buffer.
2082 Also clear out the match data for registers 1 and up. */
2085 set_search_regs (beg_byte
, nbytes
)
2086 int beg_byte
, nbytes
;
2090 if (!NILP (Vinhibit_changing_match_data
))
2093 /* Make sure we have registers in which to store
2094 the match position. */
2095 if (search_regs
.num_regs
== 0)
2097 search_regs
.start
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
2098 search_regs
.end
= (regoff_t
*) xmalloc (2 * sizeof (regoff_t
));
2099 search_regs
.num_regs
= 2;
2102 /* Clear out the other registers. */
2103 for (i
= 1; i
< search_regs
.num_regs
; i
++)
2105 search_regs
.start
[i
] = -1;
2106 search_regs
.end
[i
] = -1;
2109 search_regs
.start
[0] = BYTE_TO_CHAR (beg_byte
);
2110 search_regs
.end
[0] = BYTE_TO_CHAR (beg_byte
+ nbytes
);
2111 XSETBUFFER (last_thing_searched
, current_buffer
);
2114 /* Given a string of words separated by word delimiters,
2115 compute a regexp that matches those exact words
2116 separated by arbitrary punctuation. */
2122 register unsigned char *p
, *o
;
2123 register int i
, i_byte
, len
, punct_count
= 0, word_count
= 0;
2128 CHECK_STRING (string
);
2130 len
= SCHARS (string
);
2132 for (i
= 0, i_byte
= 0; i
< len
; )
2136 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c
, string
, i
, i_byte
);
2138 if (SYNTAX (c
) != Sword
)
2141 if (i
> 0 && SYNTAX (prev_c
) == Sword
)
2148 if (SYNTAX (prev_c
) == Sword
)
2151 return empty_unibyte_string
;
2153 adjust
= - punct_count
+ 5 * (word_count
- 1) + 4;
2154 if (STRING_MULTIBYTE (string
))
2155 val
= make_uninit_multibyte_string (len
+ adjust
,
2159 val
= make_uninit_string (len
+ adjust
);
2166 for (i
= 0, i_byte
= 0; i
< len
; )
2169 int i_byte_orig
= i_byte
;
2171 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c
, string
, i
, i_byte
);
2173 if (SYNTAX (c
) == Sword
)
2175 bcopy (SDATA (string
) + i_byte_orig
, o
,
2176 i_byte
- i_byte_orig
);
2177 o
+= i_byte
- i_byte_orig
;
2179 else if (i
> 0 && SYNTAX (prev_c
) == Sword
&& --word_count
)
2197 DEFUN ("search-backward", Fsearch_backward
, Ssearch_backward
, 1, 4,
2198 "MSearch backward: ",
2199 doc
: /* Search backward from point for STRING.
2200 Set point to the beginning of the occurrence found, and return point.
2201 An optional second argument bounds the search; it is a buffer position.
2202 The match found must not extend before that position.
2203 Optional third argument, if t, means if fail just return nil (no error).
2204 If not nil and not t, position at limit of search and return nil.
2205 Optional fourth argument is repeat count--search for successive occurrences.
2207 Search case-sensitivity is determined by the value of the variable
2208 `case-fold-search', which see.
2210 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2211 (string
, bound
, noerror
, count
)
2212 Lisp_Object string
, bound
, noerror
, count
;
2214 return search_command (string
, bound
, noerror
, count
, -1, 0, 0);
2217 DEFUN ("search-forward", Fsearch_forward
, Ssearch_forward
, 1, 4, "MSearch: ",
2218 doc
: /* Search forward from point for STRING.
2219 Set point to the end of the occurrence found, and return point.
2220 An optional second argument bounds the search; it is a buffer position.
2221 The match found must not extend after that position. A value of nil is
2222 equivalent to (point-max).
2223 Optional third argument, if t, means if fail just return nil (no error).
2224 If not nil and not t, move to limit of search and return nil.
2225 Optional fourth argument is repeat count--search for successive occurrences.
2227 Search case-sensitivity is determined by the value of the variable
2228 `case-fold-search', which see.
2230 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2231 (string
, bound
, noerror
, count
)
2232 Lisp_Object string
, bound
, noerror
, count
;
2234 return search_command (string
, bound
, noerror
, count
, 1, 0, 0);
2237 DEFUN ("word-search-backward", Fword_search_backward
, Sword_search_backward
, 1, 4,
2238 "sWord search backward: ",
2239 doc
: /* Search backward from point for STRING, ignoring differences in punctuation.
2240 Set point to the beginning of the occurrence found, and return point.
2241 An optional second argument bounds the search; it is a buffer position.
2242 The match found must not extend before that position.
2243 Optional third argument, if t, means if fail just return nil (no error).
2244 If not nil and not t, move to limit of search and return nil.
2245 Optional fourth argument is repeat count--search for successive occurrences. */)
2246 (string
, bound
, noerror
, count
)
2247 Lisp_Object string
, bound
, noerror
, count
;
2249 return search_command (wordify (string
), bound
, noerror
, count
, -1, 1, 0);
2252 DEFUN ("word-search-forward", Fword_search_forward
, Sword_search_forward
, 1, 4,
2254 doc
: /* Search forward from point for STRING, ignoring differences in punctuation.
2255 Set point to the end of the occurrence found, and return point.
2256 An optional second argument bounds the search; it is a buffer position.
2257 The match found must not extend after that position.
2258 Optional third argument, if t, means if fail just return nil (no error).
2259 If not nil and not t, move to limit of search and return nil.
2260 Optional fourth argument is repeat count--search for successive occurrences. */)
2261 (string
, bound
, noerror
, count
)
2262 Lisp_Object string
, bound
, noerror
, count
;
2264 return search_command (wordify (string
), bound
, noerror
, count
, 1, 1, 0);
2267 DEFUN ("re-search-backward", Fre_search_backward
, Sre_search_backward
, 1, 4,
2268 "sRE search backward: ",
2269 doc
: /* Search backward from point for match for regular expression REGEXP.
2270 Set point to the beginning of the match, and return point.
2271 The match found is the one starting last in the buffer
2272 and yet ending before the origin of the search.
2273 An optional second argument bounds the search; it is a buffer position.
2274 The match found must start at or after that position.
2275 Optional third argument, if t, means if fail just return nil (no error).
2276 If not nil and not t, move to limit of search and return nil.
2277 Optional fourth argument is repeat count--search for successive occurrences.
2278 See also the functions `match-beginning', `match-end', `match-string',
2279 and `replace-match'. */)
2280 (regexp
, bound
, noerror
, count
)
2281 Lisp_Object regexp
, bound
, noerror
, count
;
2283 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 0);
2286 DEFUN ("re-search-forward", Fre_search_forward
, Sre_search_forward
, 1, 4,
2288 doc
: /* Search forward from point for regular expression REGEXP.
2289 Set point to the end of the occurrence found, and return point.
2290 An optional second argument bounds the search; it is a buffer position.
2291 The match found must not extend after that position.
2292 Optional third argument, if t, means if fail just return nil (no error).
2293 If not nil and not t, move to limit of search and return nil.
2294 Optional fourth argument is repeat count--search for successive occurrences.
2295 See also the functions `match-beginning', `match-end', `match-string',
2296 and `replace-match'. */)
2297 (regexp
, bound
, noerror
, count
)
2298 Lisp_Object regexp
, bound
, noerror
, count
;
2300 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 0);
2303 DEFUN ("posix-search-backward", Fposix_search_backward
, Sposix_search_backward
, 1, 4,
2304 "sPosix search backward: ",
2305 doc
: /* Search backward from point for match for regular expression REGEXP.
2306 Find the longest match in accord with Posix regular expression rules.
2307 Set point to the beginning of the match, and return point.
2308 The match found is the one starting last in the buffer
2309 and yet ending before the origin of the search.
2310 An optional second argument bounds the search; it is a buffer position.
2311 The match found must start at or after that position.
2312 Optional third argument, if t, means if fail just return nil (no error).
2313 If not nil and not t, move to limit of search and return nil.
2314 Optional fourth argument is repeat count--search for successive occurrences.
2315 See also the functions `match-beginning', `match-end', `match-string',
2316 and `replace-match'. */)
2317 (regexp
, bound
, noerror
, count
)
2318 Lisp_Object regexp
, bound
, noerror
, count
;
2320 return search_command (regexp
, bound
, noerror
, count
, -1, 1, 1);
2323 DEFUN ("posix-search-forward", Fposix_search_forward
, Sposix_search_forward
, 1, 4,
2325 doc
: /* Search forward from point for regular expression REGEXP.
2326 Find the longest match in accord with Posix regular expression rules.
2327 Set point to the end of the occurrence found, and return point.
2328 An optional second argument bounds the search; it is a buffer position.
2329 The match found must not extend after that position.
2330 Optional third argument, if t, means if fail just return nil (no error).
2331 If not nil and not t, move to limit of search and return nil.
2332 Optional fourth argument is repeat count--search for successive occurrences.
2333 See also the functions `match-beginning', `match-end', `match-string',
2334 and `replace-match'. */)
2335 (regexp
, bound
, noerror
, count
)
2336 Lisp_Object regexp
, bound
, noerror
, count
;
2338 return search_command (regexp
, bound
, noerror
, count
, 1, 1, 1);
2341 DEFUN ("replace-match", Freplace_match
, Sreplace_match
, 1, 5, 0,
2342 doc
: /* Replace text matched by last search with NEWTEXT.
2343 Leave point at the end of the replacement text.
2345 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2346 Otherwise maybe capitalize the whole text, or maybe just word initials,
2347 based on the replaced text.
2348 If the replaced text has only capital letters
2349 and has at least one multiletter word, convert NEWTEXT to all caps.
2350 Otherwise if all words are capitalized in the replaced text,
2351 capitalize each word in NEWTEXT.
2353 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2354 Otherwise treat `\\' as special:
2355 `\\&' in NEWTEXT means substitute original matched text.
2356 `\\N' means substitute what matched the Nth `\\(...\\)'.
2357 If Nth parens didn't match, substitute nothing.
2358 `\\\\' means insert one `\\'.
2359 Case conversion does not apply to these substitutions.
2361 FIXEDCASE and LITERAL are optional arguments.
2363 The optional fourth argument STRING can be a string to modify.
2364 This is meaningful when the previous match was done against STRING,
2365 using `string-match'. When used this way, `replace-match'
2366 creates and returns a new string made by copying STRING and replacing
2367 the part of STRING that was matched.
2369 The optional fifth argument SUBEXP specifies a subexpression;
2370 it says to replace just that subexpression with NEWTEXT,
2371 rather than replacing the entire matched text.
2372 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2373 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2374 NEWTEXT in place of subexp N.
2375 This is useful only after a regular expression search or match,
2376 since only regular expressions have distinguished subexpressions. */)
2377 (newtext
, fixedcase
, literal
, string
, subexp
)
2378 Lisp_Object newtext
, fixedcase
, literal
, string
, subexp
;
2380 enum { nochange
, all_caps
, cap_initial
} case_action
;
2381 register int pos
, pos_byte
;
2382 int some_multiletter_word
;
2385 int some_nonuppercase_initial
;
2386 register int c
, prevc
;
2388 int opoint
, newpoint
;
2390 CHECK_STRING (newtext
);
2392 if (! NILP (string
))
2393 CHECK_STRING (string
);
2395 case_action
= nochange
; /* We tried an initialization */
2396 /* but some C compilers blew it */
2398 if (search_regs
.num_regs
<= 0)
2399 error ("`replace-match' called before any match found");
2405 CHECK_NUMBER (subexp
);
2406 sub
= XINT (subexp
);
2407 if (sub
< 0 || sub
>= search_regs
.num_regs
)
2408 args_out_of_range (subexp
, make_number (search_regs
.num_regs
));
2413 if (search_regs
.start
[sub
] < BEGV
2414 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2415 || search_regs
.end
[sub
] > ZV
)
2416 args_out_of_range (make_number (search_regs
.start
[sub
]),
2417 make_number (search_regs
.end
[sub
]));
2421 if (search_regs
.start
[sub
] < 0
2422 || search_regs
.start
[sub
] > search_regs
.end
[sub
]
2423 || search_regs
.end
[sub
] > SCHARS (string
))
2424 args_out_of_range (make_number (search_regs
.start
[sub
]),
2425 make_number (search_regs
.end
[sub
]));
2428 if (NILP (fixedcase
))
2430 /* Decide how to casify by examining the matched text. */
2433 pos
= search_regs
.start
[sub
];
2434 last
= search_regs
.end
[sub
];
2437 pos_byte
= CHAR_TO_BYTE (pos
);
2439 pos_byte
= string_char_to_byte (string
, pos
);
2442 case_action
= all_caps
;
2444 /* some_multiletter_word is set nonzero if any original word
2445 is more than one letter long. */
2446 some_multiletter_word
= 0;
2448 some_nonuppercase_initial
= 0;
2455 c
= FETCH_CHAR_AS_MULTIBYTE (pos_byte
);
2456 INC_BOTH (pos
, pos_byte
);
2459 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c
, string
, pos
, pos_byte
);
2463 /* Cannot be all caps if any original char is lower case */
2466 if (SYNTAX (prevc
) != Sword
)
2467 some_nonuppercase_initial
= 1;
2469 some_multiletter_word
= 1;
2471 else if (UPPERCASEP (c
))
2474 if (SYNTAX (prevc
) != Sword
)
2477 some_multiletter_word
= 1;
2481 /* If the initial is a caseless word constituent,
2482 treat that like a lowercase initial. */
2483 if (SYNTAX (prevc
) != Sword
)
2484 some_nonuppercase_initial
= 1;
2490 /* Convert to all caps if the old text is all caps
2491 and has at least one multiletter word. */
2492 if (! some_lowercase
&& some_multiletter_word
)
2493 case_action
= all_caps
;
2494 /* Capitalize each word, if the old text has all capitalized words. */
2495 else if (!some_nonuppercase_initial
&& some_multiletter_word
)
2496 case_action
= cap_initial
;
2497 else if (!some_nonuppercase_initial
&& some_uppercase
)
2498 /* Should x -> yz, operating on X, give Yz or YZ?
2499 We'll assume the latter. */
2500 case_action
= all_caps
;
2502 case_action
= nochange
;
2505 /* Do replacement in a string. */
2508 Lisp_Object before
, after
;
2510 before
= Fsubstring (string
, make_number (0),
2511 make_number (search_regs
.start
[sub
]));
2512 after
= Fsubstring (string
, make_number (search_regs
.end
[sub
]), Qnil
);
2514 /* Substitute parts of the match into NEWTEXT
2519 int lastpos_byte
= 0;
2520 /* We build up the substituted string in ACCUM. */
2523 int length
= SBYTES (newtext
);
2527 for (pos_byte
= 0, pos
= 0; pos_byte
< length
;)
2531 int delbackslash
= 0;
2533 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2537 FETCH_STRING_CHAR_ADVANCE (c
, newtext
, pos
, pos_byte
);
2541 substart
= search_regs
.start
[sub
];
2542 subend
= search_regs
.end
[sub
];
2544 else if (c
>= '1' && c
<= '9')
2546 if (search_regs
.start
[c
- '0'] >= 0
2547 && c
<= search_regs
.num_regs
+ '0')
2549 substart
= search_regs
.start
[c
- '0'];
2550 subend
= search_regs
.end
[c
- '0'];
2554 /* If that subexp did not match,
2555 replace \\N with nothing. */
2563 error ("Invalid use of `\\' in replacement text");
2567 if (pos
- 2 != lastpos
)
2568 middle
= substring_both (newtext
, lastpos
,
2570 pos
- 2, pos_byte
- 2);
2573 accum
= concat3 (accum
, middle
,
2575 make_number (substart
),
2576 make_number (subend
)));
2578 lastpos_byte
= pos_byte
;
2580 else if (delbackslash
)
2582 middle
= substring_both (newtext
, lastpos
,
2584 pos
- 1, pos_byte
- 1);
2586 accum
= concat2 (accum
, middle
);
2588 lastpos_byte
= pos_byte
;
2593 middle
= substring_both (newtext
, lastpos
,
2599 newtext
= concat2 (accum
, middle
);
2602 /* Do case substitution in NEWTEXT if desired. */
2603 if (case_action
== all_caps
)
2604 newtext
= Fupcase (newtext
);
2605 else if (case_action
== cap_initial
)
2606 newtext
= Fupcase_initials (newtext
);
2608 return concat3 (before
, newtext
, after
);
2611 /* Record point, then move (quietly) to the start of the match. */
2612 if (PT
>= search_regs
.end
[sub
])
2614 else if (PT
> search_regs
.start
[sub
])
2615 opoint
= search_regs
.end
[sub
] - ZV
;
2619 /* If we want non-literal replacement,
2620 perform substitution on the replacement string. */
2623 int length
= SBYTES (newtext
);
2624 unsigned char *substed
;
2625 int substed_alloc_size
, substed_len
;
2626 int buf_multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
2627 int str_multibyte
= STRING_MULTIBYTE (newtext
);
2628 Lisp_Object rev_tbl
;
2629 int really_changed
= 0;
2633 substed_alloc_size
= length
* 2 + 100;
2634 substed
= (unsigned char *) xmalloc (substed_alloc_size
+ 1);
2637 /* Go thru NEWTEXT, producing the actual text to insert in
2638 SUBSTED while adjusting multibyteness to that of the current
2641 for (pos_byte
= 0, pos
= 0; pos_byte
< length
;)
2643 unsigned char str
[MAX_MULTIBYTE_LENGTH
];
2644 unsigned char *add_stuff
= NULL
;
2650 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c
, newtext
, pos
, pos_byte
);
2652 c
= multibyte_char_to_unibyte (c
, rev_tbl
);
2656 /* Note that we don't have to increment POS. */
2657 c
= SREF (newtext
, pos_byte
++);
2659 c
= unibyte_char_to_multibyte (c
);
2662 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2663 or set IDX to a match index, which means put that part
2664 of the buffer text into SUBSTED. */
2672 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c
, newtext
,
2674 if (!buf_multibyte
&& !ASCII_CHAR_P (c
))
2675 c
= multibyte_char_to_unibyte (c
, rev_tbl
);
2679 c
= SREF (newtext
, pos_byte
++);
2681 c
= unibyte_char_to_multibyte (c
);
2686 else if (c
>= '1' && c
<= '9' && c
<= search_regs
.num_regs
+ '0')
2688 if (search_regs
.start
[c
- '0'] >= 1)
2692 add_len
= 1, add_stuff
= "\\";
2696 error ("Invalid use of `\\' in replacement text");
2701 add_len
= CHAR_STRING (c
, str
);
2705 /* If we want to copy part of a previous match,
2706 set up ADD_STUFF and ADD_LEN to point to it. */
2709 int begbyte
= CHAR_TO_BYTE (search_regs
.start
[idx
]);
2710 add_len
= CHAR_TO_BYTE (search_regs
.end
[idx
]) - begbyte
;
2711 if (search_regs
.start
[idx
] < GPT
&& GPT
< search_regs
.end
[idx
])
2712 move_gap (search_regs
.start
[idx
]);
2713 add_stuff
= BYTE_POS_ADDR (begbyte
);
2716 /* Now the stuff we want to add to SUBSTED
2717 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2719 /* Make sure SUBSTED is big enough. */
2720 if (substed_len
+ add_len
>= substed_alloc_size
)
2722 substed_alloc_size
= substed_len
+ add_len
+ 500;
2723 substed
= (unsigned char *) xrealloc (substed
,
2724 substed_alloc_size
+ 1);
2727 /* Now add to the end of SUBSTED. */
2730 bcopy (add_stuff
, substed
+ substed_len
, add_len
);
2731 substed_len
+= add_len
;
2739 int nchars
= multibyte_chars_in_text (substed
, substed_len
);
2741 newtext
= make_multibyte_string (substed
, nchars
, substed_len
);
2744 newtext
= make_unibyte_string (substed
, substed_len
);
2749 /* Replace the old text with the new in the cleanest possible way. */
2750 replace_range (search_regs
.start
[sub
], search_regs
.end
[sub
],
2752 newpoint
= search_regs
.start
[sub
] + SCHARS (newtext
);
2754 if (case_action
== all_caps
)
2755 Fupcase_region (make_number (search_regs
.start
[sub
]),
2756 make_number (newpoint
));
2757 else if (case_action
== cap_initial
)
2758 Fupcase_initials_region (make_number (search_regs
.start
[sub
]),
2759 make_number (newpoint
));
2761 /* Adjust search data for this change. */
2763 int oldend
= search_regs
.end
[sub
];
2764 int oldstart
= search_regs
.start
[sub
];
2765 int change
= newpoint
- search_regs
.end
[sub
];
2768 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2770 if (search_regs
.start
[i
] >= oldend
)
2771 search_regs
.start
[i
] += change
;
2772 else if (search_regs
.start
[i
] > oldstart
)
2773 search_regs
.start
[i
] = oldstart
;
2774 if (search_regs
.end
[i
] >= oldend
)
2775 search_regs
.end
[i
] += change
;
2776 else if (search_regs
.end
[i
] > oldstart
)
2777 search_regs
.end
[i
] = oldstart
;
2781 /* Put point back where it was in the text. */
2783 TEMP_SET_PT (opoint
+ ZV
);
2785 TEMP_SET_PT (opoint
);
2787 /* Now move point "officially" to the start of the inserted replacement. */
2788 move_if_not_intangible (newpoint
);
2794 match_limit (num
, beginningp
)
2803 args_out_of_range (num
, make_number (0));
2804 if (search_regs
.num_regs
<= 0)
2805 error ("No match data, because no search succeeded");
2806 if (n
>= search_regs
.num_regs
2807 || search_regs
.start
[n
] < 0)
2809 return (make_number ((beginningp
) ? search_regs
.start
[n
]
2810 : search_regs
.end
[n
]));
2813 DEFUN ("match-beginning", Fmatch_beginning
, Smatch_beginning
, 1, 1, 0,
2814 doc
: /* Return position of start of text matched by last search.
2815 SUBEXP, a number, specifies which parenthesized expression in the last
2817 Value is nil if SUBEXPth pair didn't match, or there were less than
2819 Zero means the entire text matched by the whole regexp or whole string. */)
2823 return match_limit (subexp
, 1);
2826 DEFUN ("match-end", Fmatch_end
, Smatch_end
, 1, 1, 0,
2827 doc
: /* Return position of end of text matched by last search.
2828 SUBEXP, a number, specifies which parenthesized expression in the last
2830 Value is nil if SUBEXPth pair didn't match, or there were less than
2832 Zero means the entire text matched by the whole regexp or whole string. */)
2836 return match_limit (subexp
, 0);
2839 DEFUN ("match-data", Fmatch_data
, Smatch_data
, 0, 3, 0,
2840 doc
: /* Return a list containing all info on what the last search matched.
2841 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2842 All the elements are markers or nil (nil if the Nth pair didn't match)
2843 if the last match was on a buffer; integers or nil if a string was matched.
2844 Use `store-match-data' to reinstate the data in this list.
2846 If INTEGERS (the optional first argument) is non-nil, always use
2847 integers \(rather than markers) to represent buffer positions. In
2848 this case, and if the last match was in a buffer, the buffer will get
2849 stored as one additional element at the end of the list.
2851 If REUSE is a list, reuse it as part of the value. If REUSE is long
2852 enough to hold all the values, and if INTEGERS is non-nil, no consing
2855 If optional third arg RESEAT is non-nil, any previous markers on the
2856 REUSE list will be modified to point to nowhere.
2858 Return value is undefined if the last search failed. */)
2859 (integers
, reuse
, reseat
)
2860 Lisp_Object integers
, reuse
, reseat
;
2862 Lisp_Object tail
, prev
;
2867 for (tail
= reuse
; CONSP (tail
); tail
= XCDR (tail
))
2868 if (MARKERP (XCAR (tail
)))
2870 unchain_marker (XMARKER (XCAR (tail
)));
2871 XSETCAR (tail
, Qnil
);
2874 if (NILP (last_thing_searched
))
2879 data
= (Lisp_Object
*) alloca ((2 * search_regs
.num_regs
+ 1)
2880 * sizeof (Lisp_Object
));
2883 for (i
= 0; i
< search_regs
.num_regs
; i
++)
2885 int start
= search_regs
.start
[i
];
2888 if (EQ (last_thing_searched
, Qt
)
2889 || ! NILP (integers
))
2891 XSETFASTINT (data
[2 * i
], start
);
2892 XSETFASTINT (data
[2 * i
+ 1], search_regs
.end
[i
]);
2894 else if (BUFFERP (last_thing_searched
))
2896 data
[2 * i
] = Fmake_marker ();
2897 Fset_marker (data
[2 * i
],
2898 make_number (start
),
2899 last_thing_searched
);
2900 data
[2 * i
+ 1] = Fmake_marker ();
2901 Fset_marker (data
[2 * i
+ 1],
2902 make_number (search_regs
.end
[i
]),
2903 last_thing_searched
);
2906 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2912 data
[2 * i
] = data
[2 * i
+ 1] = Qnil
;
2915 if (BUFFERP (last_thing_searched
) && !NILP (integers
))
2917 data
[len
] = last_thing_searched
;
2921 /* If REUSE is not usable, cons up the values and return them. */
2922 if (! CONSP (reuse
))
2923 return Flist (len
, data
);
2925 /* If REUSE is a list, store as many value elements as will fit
2926 into the elements of REUSE. */
2927 for (i
= 0, tail
= reuse
; CONSP (tail
);
2928 i
++, tail
= XCDR (tail
))
2931 XSETCAR (tail
, data
[i
]);
2933 XSETCAR (tail
, Qnil
);
2937 /* If we couldn't fit all value elements into REUSE,
2938 cons up the rest of them and add them to the end of REUSE. */
2940 XSETCDR (prev
, Flist (len
- i
, data
+ i
));
2945 /* We used to have an internal use variant of `reseat' described as:
2947 If RESEAT is `evaporate', put the markers back on the free list
2948 immediately. No other references to the markers must exist in this
2949 case, so it is used only internally on the unwind stack and
2950 save-match-data from Lisp.
2952 But it was ill-conceived: those supposedly-internal markers get exposed via
2953 the undo-list, so freeing them here is unsafe. */
2955 DEFUN ("set-match-data", Fset_match_data
, Sset_match_data
, 1, 2, 0,
2956 doc
: /* Set internal data on last search match from elements of LIST.
2957 LIST should have been created by calling `match-data' previously.
2959 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2961 register Lisp_Object list
, reseat
;
2964 register Lisp_Object marker
;
2966 if (running_asynch_code
)
2967 save_search_regs ();
2971 /* Unless we find a marker with a buffer or an explicit buffer
2972 in LIST, assume that this match data came from a string. */
2973 last_thing_searched
= Qt
;
2975 /* Allocate registers if they don't already exist. */
2977 int length
= XFASTINT (Flength (list
)) / 2;
2979 if (length
> search_regs
.num_regs
)
2981 if (search_regs
.num_regs
== 0)
2984 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2986 = (regoff_t
*) xmalloc (length
* sizeof (regoff_t
));
2991 = (regoff_t
*) xrealloc (search_regs
.start
,
2992 length
* sizeof (regoff_t
));
2994 = (regoff_t
*) xrealloc (search_regs
.end
,
2995 length
* sizeof (regoff_t
));
2998 for (i
= search_regs
.num_regs
; i
< length
; i
++)
2999 search_regs
.start
[i
] = -1;
3001 search_regs
.num_regs
= length
;
3004 for (i
= 0; CONSP (list
); i
++)
3006 marker
= XCAR (list
);
3007 if (BUFFERP (marker
))
3009 last_thing_searched
= marker
;
3016 search_regs
.start
[i
] = -1;
3025 if (MARKERP (marker
))
3027 if (XMARKER (marker
)->buffer
== 0)
3028 XSETFASTINT (marker
, 0);
3030 XSETBUFFER (last_thing_searched
, XMARKER (marker
)->buffer
);
3033 CHECK_NUMBER_COERCE_MARKER (marker
);
3034 from
= XINT (marker
);
3036 if (!NILP (reseat
) && MARKERP (m
))
3038 unchain_marker (XMARKER (m
));
3039 XSETCAR (list
, Qnil
);
3042 if ((list
= XCDR (list
), !CONSP (list
)))
3045 m
= marker
= XCAR (list
);
3047 if (MARKERP (marker
) && XMARKER (marker
)->buffer
== 0)
3048 XSETFASTINT (marker
, 0);
3050 CHECK_NUMBER_COERCE_MARKER (marker
);
3051 search_regs
.start
[i
] = from
;
3052 search_regs
.end
[i
] = XINT (marker
);
3054 if (!NILP (reseat
) && MARKERP (m
))
3056 unchain_marker (XMARKER (m
));
3057 XSETCAR (list
, Qnil
);
3063 for (; i
< search_regs
.num_regs
; i
++)
3064 search_regs
.start
[i
] = -1;
3070 /* If non-zero the match data have been saved in saved_search_regs
3071 during the execution of a sentinel or filter. */
3072 static int search_regs_saved
;
3073 static struct re_registers saved_search_regs
;
3074 static Lisp_Object saved_last_thing_searched
;
3076 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3077 if asynchronous code (filter or sentinel) is running. */
3081 if (!search_regs_saved
)
3083 saved_search_regs
.num_regs
= search_regs
.num_regs
;
3084 saved_search_regs
.start
= search_regs
.start
;
3085 saved_search_regs
.end
= search_regs
.end
;
3086 saved_last_thing_searched
= last_thing_searched
;
3087 last_thing_searched
= Qnil
;
3088 search_regs
.num_regs
= 0;
3089 search_regs
.start
= 0;
3090 search_regs
.end
= 0;
3092 search_regs_saved
= 1;
3096 /* Called upon exit from filters and sentinels. */
3098 restore_search_regs ()
3100 if (search_regs_saved
)
3102 if (search_regs
.num_regs
> 0)
3104 xfree (search_regs
.start
);
3105 xfree (search_regs
.end
);
3107 search_regs
.num_regs
= saved_search_regs
.num_regs
;
3108 search_regs
.start
= saved_search_regs
.start
;
3109 search_regs
.end
= saved_search_regs
.end
;
3110 last_thing_searched
= saved_last_thing_searched
;
3111 saved_last_thing_searched
= Qnil
;
3112 search_regs_saved
= 0;
3117 unwind_set_match_data (list
)
3120 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3121 return Fset_match_data (list
, Qt
);
3124 /* Called to unwind protect the match data. */
3126 record_unwind_save_match_data ()
3128 record_unwind_protect (unwind_set_match_data
,
3129 Fmatch_data (Qnil
, Qnil
, Qnil
));
3132 /* Quote a string to inactivate reg-expr chars */
3134 DEFUN ("regexp-quote", Fregexp_quote
, Sregexp_quote
, 1, 1, 0,
3135 doc
: /* Return a regexp string which matches exactly STRING and nothing else. */)
3139 register unsigned char *in
, *out
, *end
;
3140 register unsigned char *temp
;
3141 int backslashes_added
= 0;
3143 CHECK_STRING (string
);
3145 temp
= (unsigned char *) alloca (SBYTES (string
) * 2);
3147 /* Now copy the data into the new string, inserting escapes. */
3149 in
= SDATA (string
);
3150 end
= in
+ SBYTES (string
);
3153 for (; in
!= end
; in
++)
3156 || *in
== '*' || *in
== '.' || *in
== '\\'
3157 || *in
== '?' || *in
== '+'
3158 || *in
== '^' || *in
== '$')
3159 *out
++ = '\\', backslashes_added
++;
3163 return make_specified_string (temp
,
3164 SCHARS (string
) + backslashes_added
,
3166 STRING_MULTIBYTE (string
));
3174 for (i
= 0; i
< REGEXP_CACHE_SIZE
; ++i
)
3176 searchbufs
[i
].buf
.allocated
= 100;
3177 searchbufs
[i
].buf
.buffer
= (unsigned char *) xmalloc (100);
3178 searchbufs
[i
].buf
.fastmap
= searchbufs
[i
].fastmap
;
3179 searchbufs
[i
].regexp
= Qnil
;
3180 searchbufs
[i
].whitespace_regexp
= Qnil
;
3181 searchbufs
[i
].syntax_table
= Qnil
;
3182 staticpro (&searchbufs
[i
].regexp
);
3183 staticpro (&searchbufs
[i
].whitespace_regexp
);
3184 staticpro (&searchbufs
[i
].syntax_table
);
3185 searchbufs
[i
].next
= (i
== REGEXP_CACHE_SIZE
-1 ? 0 : &searchbufs
[i
+1]);
3187 searchbuf_head
= &searchbufs
[0];
3189 Qsearch_failed
= intern ("search-failed");
3190 staticpro (&Qsearch_failed
);
3191 Qinvalid_regexp
= intern ("invalid-regexp");
3192 staticpro (&Qinvalid_regexp
);
3194 Fput (Qsearch_failed
, Qerror_conditions
,
3195 Fcons (Qsearch_failed
, Fcons (Qerror
, Qnil
)));
3196 Fput (Qsearch_failed
, Qerror_message
,
3197 build_string ("Search failed"));
3199 Fput (Qinvalid_regexp
, Qerror_conditions
,
3200 Fcons (Qinvalid_regexp
, Fcons (Qerror
, Qnil
)));
3201 Fput (Qinvalid_regexp
, Qerror_message
,
3202 build_string ("Invalid regexp"));
3204 last_thing_searched
= Qnil
;
3205 staticpro (&last_thing_searched
);
3207 saved_last_thing_searched
= Qnil
;
3208 staticpro (&saved_last_thing_searched
);
3210 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp
,
3211 doc
: /* Regexp to substitute for bunches of spaces in regexp search.
3212 Some commands use this for user-specified regexps.
3213 Spaces that occur inside character classes or repetition operators
3214 or other such regexp constructs are not replaced with this.
3215 A value of nil (which is the normal value) means treat spaces literally. */);
3216 Vsearch_spaces_regexp
= Qnil
;
3218 DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data
,
3219 doc
: /* Internal use only.
3220 If non-nil, the primitive searching and matching functions
3221 such as `looking-at', `string-match', `re-search-forward', etc.,
3222 do not set the match data. The proper way to use this variable
3223 is to bind it with `let' around a small expression. */);
3224 Vinhibit_changing_match_data
= Qnil
;
3226 defsubr (&Slooking_at
);
3227 defsubr (&Sposix_looking_at
);
3228 defsubr (&Sstring_match
);
3229 defsubr (&Sposix_string_match
);
3230 defsubr (&Ssearch_forward
);
3231 defsubr (&Ssearch_backward
);
3232 defsubr (&Sword_search_forward
);
3233 defsubr (&Sword_search_backward
);
3234 defsubr (&Sre_search_forward
);
3235 defsubr (&Sre_search_backward
);
3236 defsubr (&Sposix_search_forward
);
3237 defsubr (&Sposix_search_backward
);
3238 defsubr (&Sreplace_match
);
3239 defsubr (&Smatch_beginning
);
3240 defsubr (&Smatch_end
);
3241 defsubr (&Smatch_data
);
3242 defsubr (&Sset_match_data
);
3243 defsubr (&Sregexp_quote
);
3246 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3247 (do not change this comment) */