]> code.delx.au - gnu-emacs/blob - src/search.c
Merge from emacs--devo--0
[gnu-emacs] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22
23 #include <config.h>
24 #include "lisp.h"
25 #include "syntax.h"
26 #include "category.h"
27 #include "buffer.h"
28 #include "character.h"
29 #include "charset.h"
30 #include "region-cache.h"
31 #include "commands.h"
32 #include "blockinput.h"
33 #include "intervals.h"
34
35 #include <sys/types.h>
36 #include "regex.h"
37
38 #define REGEXP_CACHE_SIZE 20
39
40 /* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
42 struct regexp_cache
43 {
44 struct regexp_cache *next;
45 Lisp_Object regexp, whitespace_regexp;
46 /* Syntax table for which the regexp applies. We need this because
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
49 Lisp_Object syntax_table;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
54 };
55
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
58
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache *searchbuf_head;
61
62
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
81
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
86
87 /* error condition signaled when regexp compile_pattern fails */
88
89 Lisp_Object Qinvalid_regexp;
90
91 /* Error condition used for failing searches */
92 Lisp_Object Qsearch_failed;
93
94 Lisp_Object Vsearch_spaces_regexp;
95
96 /* If non-nil, the match data will not be changed during call to
97 searching or matching functions. This variable is for internal use
98 only. */
99 Lisp_Object Vinhibit_changing_match_data;
100
101 static void set_search_regs ();
102 static void save_search_regs ();
103 static int simple_search ();
104 static int boyer_moore ();
105 static int search_buffer ();
106 static void matcher_overflow () NO_RETURN;
107
108 static void
109 matcher_overflow ()
110 {
111 error ("Stack overflow in regexp matcher");
112 }
113
114 /* Compile a regexp and signal a Lisp error if anything goes wrong.
115 PATTERN is the pattern to compile.
116 CP is the place to put the result.
117 TRANSLATE is a translation table for ignoring case, or nil for none.
118 REGP is the structure that says where to store the "register"
119 values that will result from matching this pattern.
120 If it is 0, we should compile the pattern not to record any
121 subexpression bounds.
122 POSIX is nonzero if we want full backtracking (POSIX style)
123 for this pattern. 0 means backtrack only enough to get a valid match.
124
125 The behavior also depends on Vsearch_spaces_regexp. */
126
127 static void
128 compile_pattern_1 (cp, pattern, translate, regp, posix)
129 struct regexp_cache *cp;
130 Lisp_Object pattern;
131 Lisp_Object translate;
132 struct re_registers *regp;
133 int posix;
134 {
135 char *val;
136 reg_syntax_t old;
137
138 cp->regexp = Qnil;
139 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
140 cp->posix = posix;
141 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
142 cp->buf.charset_unibyte = charset_unibyte;
143 cp->whitespace_regexp = Vsearch_spaces_regexp;
144 /* rms: I think BLOCK_INPUT is not needed here any more,
145 because regex.c defines malloc to call xmalloc.
146 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
147 So let's turn it off. */
148 /* BLOCK_INPUT; */
149 old = re_set_syntax (RE_SYNTAX_EMACS
150 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
151 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
152 : SDATA (Vsearch_spaces_regexp));
153
154 val = (char *) re_compile_pattern ((char *) SDATA (pattern),
155 SBYTES (pattern), &cp->buf);
156
157 /* If the compiled pattern hard codes some of the contents of the
158 syntax-table, it can only be reused with *this* syntax table. */
159 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
160
161 re_set_whitespace_regexp (NULL);
162
163 re_set_syntax (old);
164 /* UNBLOCK_INPUT; */
165 if (val)
166 xsignal1 (Qinvalid_regexp, build_string (val));
167
168 cp->regexp = Fcopy_sequence (pattern);
169 }
170
171 /* Shrink each compiled regexp buffer in the cache
172 to the size actually used right now.
173 This is called from garbage collection. */
174
175 void
176 shrink_regexp_cache ()
177 {
178 struct regexp_cache *cp;
179
180 for (cp = searchbuf_head; cp != 0; cp = cp->next)
181 {
182 cp->buf.allocated = cp->buf.used;
183 cp->buf.buffer
184 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
185 }
186 }
187
188 /* Clear the regexp cache w.r.t. a particular syntax table,
189 because it was changed.
190 There is no danger of memory leak here because re_compile_pattern
191 automagically manages the memory in each re_pattern_buffer struct,
192 based on its `allocated' and `buffer' values. */
193 void
194 clear_regexp_cache ()
195 {
196 int i;
197
198 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
199 /* It's tempting to compare with the syntax-table we've actually changd,
200 but it's not sufficient because char-table inheritance mewans that
201 modifying one syntax-table can change others at the same time. */
202 if (!EQ (searchbufs[i].syntax_table, Qt))
203 searchbufs[i].regexp = Qnil;
204 }
205
206 /* Compile a regexp if necessary, but first check to see if there's one in
207 the cache.
208 PATTERN is the pattern to compile.
209 TRANSLATE is a translation table for ignoring case, or nil for none.
210 REGP is the structure that says where to store the "register"
211 values that will result from matching this pattern.
212 If it is 0, we should compile the pattern not to record any
213 subexpression bounds.
214 POSIX is nonzero if we want full backtracking (POSIX style)
215 for this pattern. 0 means backtrack only enough to get a valid match. */
216
217 struct re_pattern_buffer *
218 compile_pattern (pattern, regp, translate, posix, multibyte)
219 Lisp_Object pattern;
220 struct re_registers *regp;
221 Lisp_Object translate;
222 int posix, multibyte;
223 {
224 struct regexp_cache *cp, **cpp;
225
226 for (cpp = &searchbuf_head; ; cpp = &cp->next)
227 {
228 cp = *cpp;
229 /* Entries are initialized to nil, and may be set to nil by
230 compile_pattern_1 if the pattern isn't valid. Don't apply
231 string accessors in those cases. However, compile_pattern_1
232 is only applied to the cache entry we pick here to reuse. So
233 nil should never appear before a non-nil entry. */
234 if (NILP (cp->regexp))
235 goto compile_it;
236 if (SCHARS (cp->regexp) == SCHARS (pattern)
237 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
238 && !NILP (Fstring_equal (cp->regexp, pattern))
239 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
240 && cp->posix == posix
241 && (EQ (cp->syntax_table, Qt)
242 || EQ (cp->syntax_table, current_buffer->syntax_table))
243 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
244 && cp->buf.charset_unibyte == charset_unibyte)
245 break;
246
247 /* If we're at the end of the cache, compile into the nil cell
248 we found, or the last (least recently used) cell with a
249 string value. */
250 if (cp->next == 0)
251 {
252 compile_it:
253 compile_pattern_1 (cp, pattern, translate, regp, posix);
254 break;
255 }
256 }
257
258 /* When we get here, cp (aka *cpp) contains the compiled pattern,
259 either because we found it in the cache or because we just compiled it.
260 Move it to the front of the queue to mark it as most recently used. */
261 *cpp = cp->next;
262 cp->next = searchbuf_head;
263 searchbuf_head = cp;
264
265 /* Advise the searching functions about the space we have allocated
266 for register data. */
267 if (regp)
268 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
269
270 /* The compiled pattern can be used both for mulitbyte and unibyte
271 target. But, we have to tell which the pattern is used for. */
272 cp->buf.target_multibyte = multibyte;
273
274 return &cp->buf;
275 }
276
277 \f
278 static Lisp_Object
279 looking_at_1 (string, posix)
280 Lisp_Object string;
281 int posix;
282 {
283 Lisp_Object val;
284 unsigned char *p1, *p2;
285 int s1, s2;
286 register int i;
287 struct re_pattern_buffer *bufp;
288
289 if (running_asynch_code)
290 save_search_regs ();
291
292 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
293 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
294 = current_buffer->case_eqv_table;
295
296 CHECK_STRING (string);
297 bufp = compile_pattern (string,
298 (NILP (Vinhibit_changing_match_data)
299 ? &search_regs : NULL),
300 (!NILP (current_buffer->case_fold_search)
301 ? current_buffer->case_canon_table : Qnil),
302 posix,
303 !NILP (current_buffer->enable_multibyte_characters));
304
305 immediate_quit = 1;
306 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
307
308 /* Get pointers and sizes of the two strings
309 that make up the visible portion of the buffer. */
310
311 p1 = BEGV_ADDR;
312 s1 = GPT_BYTE - BEGV_BYTE;
313 p2 = GAP_END_ADDR;
314 s2 = ZV_BYTE - GPT_BYTE;
315 if (s1 < 0)
316 {
317 p2 = p1;
318 s2 = ZV_BYTE - BEGV_BYTE;
319 s1 = 0;
320 }
321 if (s2 < 0)
322 {
323 s1 = ZV_BYTE - BEGV_BYTE;
324 s2 = 0;
325 }
326
327 re_match_object = Qnil;
328
329 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
330 PT_BYTE - BEGV_BYTE,
331 (NILP (Vinhibit_changing_match_data)
332 ? &search_regs : NULL),
333 ZV_BYTE - BEGV_BYTE);
334 immediate_quit = 0;
335
336 if (i == -2)
337 matcher_overflow ();
338
339 val = (0 <= i ? Qt : Qnil);
340 if (NILP (Vinhibit_changing_match_data) && i >= 0)
341 for (i = 0; i < search_regs.num_regs; i++)
342 if (search_regs.start[i] >= 0)
343 {
344 search_regs.start[i]
345 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
346 search_regs.end[i]
347 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
348 }
349
350 /* Set last_thing_searched only when match data is changed. */
351 if (NILP (Vinhibit_changing_match_data))
352 XSETBUFFER (last_thing_searched, current_buffer);
353
354 return val;
355 }
356
357 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
358 doc: /* Return t if text after point matches regular expression REGEXP.
359 This function modifies the match data that `match-beginning',
360 `match-end' and `match-data' access; save and restore the match
361 data if you want to preserve them. */)
362 (regexp)
363 Lisp_Object regexp;
364 {
365 return looking_at_1 (regexp, 0);
366 }
367
368 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
369 doc: /* Return t if text after point matches regular expression REGEXP.
370 Find the longest match, in accord with Posix regular expression rules.
371 This function modifies the match data that `match-beginning',
372 `match-end' and `match-data' access; save and restore the match
373 data if you want to preserve them. */)
374 (regexp)
375 Lisp_Object regexp;
376 {
377 return looking_at_1 (regexp, 1);
378 }
379 \f
380 static Lisp_Object
381 string_match_1 (regexp, string, start, posix)
382 Lisp_Object regexp, string, start;
383 int posix;
384 {
385 int val;
386 struct re_pattern_buffer *bufp;
387 int pos, pos_byte;
388 int i;
389
390 if (running_asynch_code)
391 save_search_regs ();
392
393 CHECK_STRING (regexp);
394 CHECK_STRING (string);
395
396 if (NILP (start))
397 pos = 0, pos_byte = 0;
398 else
399 {
400 int len = SCHARS (string);
401
402 CHECK_NUMBER (start);
403 pos = XINT (start);
404 if (pos < 0 && -pos <= len)
405 pos = len + pos;
406 else if (0 > pos || pos > len)
407 args_out_of_range (string, start);
408 pos_byte = string_char_to_byte (string, pos);
409 }
410
411 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
412 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
413 = current_buffer->case_eqv_table;
414
415 bufp = compile_pattern (regexp,
416 (NILP (Vinhibit_changing_match_data)
417 ? &search_regs : NULL),
418 (!NILP (current_buffer->case_fold_search)
419 ? current_buffer->case_canon_table : Qnil),
420 posix,
421 STRING_MULTIBYTE (string));
422 immediate_quit = 1;
423 re_match_object = string;
424
425 val = re_search (bufp, (char *) SDATA (string),
426 SBYTES (string), pos_byte,
427 SBYTES (string) - pos_byte,
428 (NILP (Vinhibit_changing_match_data)
429 ? &search_regs : NULL));
430 immediate_quit = 0;
431
432 /* Set last_thing_searched only when match data is changed. */
433 if (NILP (Vinhibit_changing_match_data))
434 last_thing_searched = Qt;
435
436 if (val == -2)
437 matcher_overflow ();
438 if (val < 0) return Qnil;
439
440 if (NILP (Vinhibit_changing_match_data))
441 for (i = 0; i < search_regs.num_regs; i++)
442 if (search_regs.start[i] >= 0)
443 {
444 search_regs.start[i]
445 = string_byte_to_char (string, search_regs.start[i]);
446 search_regs.end[i]
447 = string_byte_to_char (string, search_regs.end[i]);
448 }
449
450 return make_number (string_byte_to_char (string, val));
451 }
452
453 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
454 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
455 Matching ignores case if `case-fold-search' is non-nil.
456 If third arg START is non-nil, start search at that index in STRING.
457 For index of first char beyond the match, do (match-end 0).
458 `match-end' and `match-beginning' also give indices of substrings
459 matched by parenthesis constructs in the pattern.
460
461 You can use the function `match-string' to extract the substrings
462 matched by the parenthesis constructions in REGEXP. */)
463 (regexp, string, start)
464 Lisp_Object regexp, string, start;
465 {
466 return string_match_1 (regexp, string, start, 0);
467 }
468
469 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
470 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
471 Find the longest match, in accord with Posix regular expression rules.
472 Case is ignored if `case-fold-search' is non-nil in the current buffer.
473 If third arg START is non-nil, start search at that index in STRING.
474 For index of first char beyond the match, do (match-end 0).
475 `match-end' and `match-beginning' also give indices of substrings
476 matched by parenthesis constructs in the pattern. */)
477 (regexp, string, start)
478 Lisp_Object regexp, string, start;
479 {
480 return string_match_1 (regexp, string, start, 1);
481 }
482
483 /* Match REGEXP against STRING, searching all of STRING,
484 and return the index of the match, or negative on failure.
485 This does not clobber the match data. */
486
487 int
488 fast_string_match (regexp, string)
489 Lisp_Object regexp, string;
490 {
491 int val;
492 struct re_pattern_buffer *bufp;
493
494 bufp = compile_pattern (regexp, 0, Qnil,
495 0, STRING_MULTIBYTE (string));
496 immediate_quit = 1;
497 re_match_object = string;
498
499 val = re_search (bufp, (char *) SDATA (string),
500 SBYTES (string), 0,
501 SBYTES (string), 0);
502 immediate_quit = 0;
503 return val;
504 }
505
506 /* Match REGEXP against STRING, searching all of STRING ignoring case,
507 and return the index of the match, or negative on failure.
508 This does not clobber the match data.
509 We assume that STRING contains single-byte characters. */
510
511 extern Lisp_Object Vascii_downcase_table;
512
513 int
514 fast_c_string_match_ignore_case (regexp, string)
515 Lisp_Object regexp;
516 const char *string;
517 {
518 int val;
519 struct re_pattern_buffer *bufp;
520 int len = strlen (string);
521
522 regexp = string_make_unibyte (regexp);
523 re_match_object = Qt;
524 bufp = compile_pattern (regexp, 0,
525 Vascii_canon_table, 0,
526 0);
527 immediate_quit = 1;
528 val = re_search (bufp, string, len, 0, len, 0);
529 immediate_quit = 0;
530 return val;
531 }
532
533 /* Like fast_string_match but ignore case. */
534
535 int
536 fast_string_match_ignore_case (regexp, string)
537 Lisp_Object regexp, string;
538 {
539 int val;
540 struct re_pattern_buffer *bufp;
541
542 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
543 0, STRING_MULTIBYTE (string));
544 immediate_quit = 1;
545 re_match_object = string;
546
547 val = re_search (bufp, (char *) SDATA (string),
548 SBYTES (string), 0,
549 SBYTES (string), 0);
550 immediate_quit = 0;
551 return val;
552 }
553 \f
554 /* The newline cache: remembering which sections of text have no newlines. */
555
556 /* If the user has requested newline caching, make sure it's on.
557 Otherwise, make sure it's off.
558 This is our cheezy way of associating an action with the change of
559 state of a buffer-local variable. */
560 static void
561 newline_cache_on_off (buf)
562 struct buffer *buf;
563 {
564 if (NILP (buf->cache_long_line_scans))
565 {
566 /* It should be off. */
567 if (buf->newline_cache)
568 {
569 free_region_cache (buf->newline_cache);
570 buf->newline_cache = 0;
571 }
572 }
573 else
574 {
575 /* It should be on. */
576 if (buf->newline_cache == 0)
577 buf->newline_cache = new_region_cache ();
578 }
579 }
580
581 \f
582 /* Search for COUNT instances of the character TARGET between START and END.
583
584 If COUNT is positive, search forwards; END must be >= START.
585 If COUNT is negative, search backwards for the -COUNTth instance;
586 END must be <= START.
587 If COUNT is zero, do anything you please; run rogue, for all I care.
588
589 If END is zero, use BEGV or ZV instead, as appropriate for the
590 direction indicated by COUNT.
591
592 If we find COUNT instances, set *SHORTAGE to zero, and return the
593 position past the COUNTth match. Note that for reverse motion
594 this is not the same as the usual convention for Emacs motion commands.
595
596 If we don't find COUNT instances before reaching END, set *SHORTAGE
597 to the number of TARGETs left unfound, and return END.
598
599 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
600 except when inside redisplay. */
601
602 int
603 scan_buffer (target, start, end, count, shortage, allow_quit)
604 register int target;
605 int start, end;
606 int count;
607 int *shortage;
608 int allow_quit;
609 {
610 struct region_cache *newline_cache;
611 int direction;
612
613 if (count > 0)
614 {
615 direction = 1;
616 if (! end) end = ZV;
617 }
618 else
619 {
620 direction = -1;
621 if (! end) end = BEGV;
622 }
623
624 newline_cache_on_off (current_buffer);
625 newline_cache = current_buffer->newline_cache;
626
627 if (shortage != 0)
628 *shortage = 0;
629
630 immediate_quit = allow_quit;
631
632 if (count > 0)
633 while (start != end)
634 {
635 /* Our innermost scanning loop is very simple; it doesn't know
636 about gaps, buffer ends, or the newline cache. ceiling is
637 the position of the last character before the next such
638 obstacle --- the last character the dumb search loop should
639 examine. */
640 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
641 int start_byte = CHAR_TO_BYTE (start);
642 int tem;
643
644 /* If we're looking for a newline, consult the newline cache
645 to see where we can avoid some scanning. */
646 if (target == '\n' && newline_cache)
647 {
648 int next_change;
649 immediate_quit = 0;
650 while (region_cache_forward
651 (current_buffer, newline_cache, start_byte, &next_change))
652 start_byte = next_change;
653 immediate_quit = allow_quit;
654
655 /* START should never be after END. */
656 if (start_byte > ceiling_byte)
657 start_byte = ceiling_byte;
658
659 /* Now the text after start is an unknown region, and
660 next_change is the position of the next known region. */
661 ceiling_byte = min (next_change - 1, ceiling_byte);
662 }
663
664 /* The dumb loop can only scan text stored in contiguous
665 bytes. BUFFER_CEILING_OF returns the last character
666 position that is contiguous, so the ceiling is the
667 position after that. */
668 tem = BUFFER_CEILING_OF (start_byte);
669 ceiling_byte = min (tem, ceiling_byte);
670
671 {
672 /* The termination address of the dumb loop. */
673 register unsigned char *ceiling_addr
674 = BYTE_POS_ADDR (ceiling_byte) + 1;
675 register unsigned char *cursor
676 = BYTE_POS_ADDR (start_byte);
677 unsigned char *base = cursor;
678
679 while (cursor < ceiling_addr)
680 {
681 unsigned char *scan_start = cursor;
682
683 /* The dumb loop. */
684 while (*cursor != target && ++cursor < ceiling_addr)
685 ;
686
687 /* If we're looking for newlines, cache the fact that
688 the region from start to cursor is free of them. */
689 if (target == '\n' && newline_cache)
690 know_region_cache (current_buffer, newline_cache,
691 start_byte + scan_start - base,
692 start_byte + cursor - base);
693
694 /* Did we find the target character? */
695 if (cursor < ceiling_addr)
696 {
697 if (--count == 0)
698 {
699 immediate_quit = 0;
700 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
701 }
702 cursor++;
703 }
704 }
705
706 start = BYTE_TO_CHAR (start_byte + cursor - base);
707 }
708 }
709 else
710 while (start > end)
711 {
712 /* The last character to check before the next obstacle. */
713 int ceiling_byte = CHAR_TO_BYTE (end);
714 int start_byte = CHAR_TO_BYTE (start);
715 int tem;
716
717 /* Consult the newline cache, if appropriate. */
718 if (target == '\n' && newline_cache)
719 {
720 int next_change;
721 immediate_quit = 0;
722 while (region_cache_backward
723 (current_buffer, newline_cache, start_byte, &next_change))
724 start_byte = next_change;
725 immediate_quit = allow_quit;
726
727 /* Start should never be at or before end. */
728 if (start_byte <= ceiling_byte)
729 start_byte = ceiling_byte + 1;
730
731 /* Now the text before start is an unknown region, and
732 next_change is the position of the next known region. */
733 ceiling_byte = max (next_change, ceiling_byte);
734 }
735
736 /* Stop scanning before the gap. */
737 tem = BUFFER_FLOOR_OF (start_byte - 1);
738 ceiling_byte = max (tem, ceiling_byte);
739
740 {
741 /* The termination address of the dumb loop. */
742 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
743 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
744 unsigned char *base = cursor;
745
746 while (cursor >= ceiling_addr)
747 {
748 unsigned char *scan_start = cursor;
749
750 while (*cursor != target && --cursor >= ceiling_addr)
751 ;
752
753 /* If we're looking for newlines, cache the fact that
754 the region from after the cursor to start is free of them. */
755 if (target == '\n' && newline_cache)
756 know_region_cache (current_buffer, newline_cache,
757 start_byte + cursor - base,
758 start_byte + scan_start - base);
759
760 /* Did we find the target character? */
761 if (cursor >= ceiling_addr)
762 {
763 if (++count >= 0)
764 {
765 immediate_quit = 0;
766 return BYTE_TO_CHAR (start_byte + cursor - base);
767 }
768 cursor--;
769 }
770 }
771
772 start = BYTE_TO_CHAR (start_byte + cursor - base);
773 }
774 }
775
776 immediate_quit = 0;
777 if (shortage != 0)
778 *shortage = count * direction;
779 return start;
780 }
781 \f
782 /* Search for COUNT instances of a line boundary, which means either a
783 newline or (if selective display enabled) a carriage return.
784 Start at START. If COUNT is negative, search backwards.
785
786 We report the resulting position by calling TEMP_SET_PT_BOTH.
787
788 If we find COUNT instances. we position after (always after,
789 even if scanning backwards) the COUNTth match, and return 0.
790
791 If we don't find COUNT instances before reaching the end of the
792 buffer (or the beginning, if scanning backwards), we return
793 the number of line boundaries left unfound, and position at
794 the limit we bumped up against.
795
796 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
797 except in special cases. */
798
799 int
800 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
801 int start, start_byte;
802 int limit, limit_byte;
803 register int count;
804 int allow_quit;
805 {
806 int direction = ((count > 0) ? 1 : -1);
807
808 register unsigned char *cursor;
809 unsigned char *base;
810
811 register int ceiling;
812 register unsigned char *ceiling_addr;
813
814 int old_immediate_quit = immediate_quit;
815
816 /* The code that follows is like scan_buffer
817 but checks for either newline or carriage return. */
818
819 if (allow_quit)
820 immediate_quit++;
821
822 start_byte = CHAR_TO_BYTE (start);
823
824 if (count > 0)
825 {
826 while (start_byte < limit_byte)
827 {
828 ceiling = BUFFER_CEILING_OF (start_byte);
829 ceiling = min (limit_byte - 1, ceiling);
830 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
831 base = (cursor = BYTE_POS_ADDR (start_byte));
832 while (1)
833 {
834 while (*cursor != '\n' && ++cursor != ceiling_addr)
835 ;
836
837 if (cursor != ceiling_addr)
838 {
839 if (--count == 0)
840 {
841 immediate_quit = old_immediate_quit;
842 start_byte = start_byte + cursor - base + 1;
843 start = BYTE_TO_CHAR (start_byte);
844 TEMP_SET_PT_BOTH (start, start_byte);
845 return 0;
846 }
847 else
848 if (++cursor == ceiling_addr)
849 break;
850 }
851 else
852 break;
853 }
854 start_byte += cursor - base;
855 }
856 }
857 else
858 {
859 while (start_byte > limit_byte)
860 {
861 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
862 ceiling = max (limit_byte, ceiling);
863 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
864 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
865 while (1)
866 {
867 while (--cursor != ceiling_addr && *cursor != '\n')
868 ;
869
870 if (cursor != ceiling_addr)
871 {
872 if (++count == 0)
873 {
874 immediate_quit = old_immediate_quit;
875 /* Return the position AFTER the match we found. */
876 start_byte = start_byte + cursor - base + 1;
877 start = BYTE_TO_CHAR (start_byte);
878 TEMP_SET_PT_BOTH (start, start_byte);
879 return 0;
880 }
881 }
882 else
883 break;
884 }
885 /* Here we add 1 to compensate for the last decrement
886 of CURSOR, which took it past the valid range. */
887 start_byte += cursor - base + 1;
888 }
889 }
890
891 TEMP_SET_PT_BOTH (limit, limit_byte);
892 immediate_quit = old_immediate_quit;
893
894 return count * direction;
895 }
896
897 int
898 find_next_newline_no_quit (from, cnt)
899 register int from, cnt;
900 {
901 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
902 }
903
904 /* Like find_next_newline, but returns position before the newline,
905 not after, and only search up to TO. This isn't just
906 find_next_newline (...)-1, because you might hit TO. */
907
908 int
909 find_before_next_newline (from, to, cnt)
910 int from, to, cnt;
911 {
912 int shortage;
913 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
914
915 if (shortage == 0)
916 pos--;
917
918 return pos;
919 }
920 \f
921 /* Subroutines of Lisp buffer search functions. */
922
923 static Lisp_Object
924 search_command (string, bound, noerror, count, direction, RE, posix)
925 Lisp_Object string, bound, noerror, count;
926 int direction;
927 int RE;
928 int posix;
929 {
930 register int np;
931 int lim, lim_byte;
932 int n = direction;
933
934 if (!NILP (count))
935 {
936 CHECK_NUMBER (count);
937 n *= XINT (count);
938 }
939
940 CHECK_STRING (string);
941 if (NILP (bound))
942 {
943 if (n > 0)
944 lim = ZV, lim_byte = ZV_BYTE;
945 else
946 lim = BEGV, lim_byte = BEGV_BYTE;
947 }
948 else
949 {
950 CHECK_NUMBER_COERCE_MARKER (bound);
951 lim = XINT (bound);
952 if (n > 0 ? lim < PT : lim > PT)
953 error ("Invalid search bound (wrong side of point)");
954 if (lim > ZV)
955 lim = ZV, lim_byte = ZV_BYTE;
956 else if (lim < BEGV)
957 lim = BEGV, lim_byte = BEGV_BYTE;
958 else
959 lim_byte = CHAR_TO_BYTE (lim);
960 }
961
962 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
963 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
964 = current_buffer->case_eqv_table;
965
966 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
967 (!NILP (current_buffer->case_fold_search)
968 ? current_buffer->case_canon_table
969 : Qnil),
970 (!NILP (current_buffer->case_fold_search)
971 ? current_buffer->case_eqv_table
972 : Qnil),
973 posix);
974 if (np <= 0)
975 {
976 if (NILP (noerror))
977 xsignal1 (Qsearch_failed, string);
978
979 if (!EQ (noerror, Qt))
980 {
981 if (lim < BEGV || lim > ZV)
982 abort ();
983 SET_PT_BOTH (lim, lim_byte);
984 return Qnil;
985 #if 0 /* This would be clean, but maybe programs depend on
986 a value of nil here. */
987 np = lim;
988 #endif
989 }
990 else
991 return Qnil;
992 }
993
994 if (np < BEGV || np > ZV)
995 abort ();
996
997 SET_PT (np);
998
999 return make_number (np);
1000 }
1001 \f
1002 /* Return 1 if REGEXP it matches just one constant string. */
1003
1004 static int
1005 trivial_regexp_p (regexp)
1006 Lisp_Object regexp;
1007 {
1008 int len = SBYTES (regexp);
1009 unsigned char *s = SDATA (regexp);
1010 while (--len >= 0)
1011 {
1012 switch (*s++)
1013 {
1014 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1015 return 0;
1016 case '\\':
1017 if (--len < 0)
1018 return 0;
1019 switch (*s++)
1020 {
1021 case '|': case '(': case ')': case '`': case '\'': case 'b':
1022 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1023 case 'S': case '=': case '{': case '}': case '_':
1024 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1025 case '1': case '2': case '3': case '4': case '5':
1026 case '6': case '7': case '8': case '9':
1027 return 0;
1028 }
1029 }
1030 }
1031 return 1;
1032 }
1033
1034 /* Search for the n'th occurrence of STRING in the current buffer,
1035 starting at position POS and stopping at position LIM,
1036 treating STRING as a literal string if RE is false or as
1037 a regular expression if RE is true.
1038
1039 If N is positive, searching is forward and LIM must be greater than POS.
1040 If N is negative, searching is backward and LIM must be less than POS.
1041
1042 Returns -x if x occurrences remain to be found (x > 0),
1043 or else the position at the beginning of the Nth occurrence
1044 (if searching backward) or the end (if searching forward).
1045
1046 POSIX is nonzero if we want full backtracking (POSIX style)
1047 for this pattern. 0 means backtrack only enough to get a valid match. */
1048
1049 #define TRANSLATE(out, trt, d) \
1050 do \
1051 { \
1052 if (! NILP (trt)) \
1053 { \
1054 Lisp_Object temp; \
1055 temp = Faref (trt, make_number (d)); \
1056 if (INTEGERP (temp)) \
1057 out = XINT (temp); \
1058 else \
1059 out = d; \
1060 } \
1061 else \
1062 out = d; \
1063 } \
1064 while (0)
1065
1066 /* Only used in search_buffer, to record the end position of the match
1067 when searching regexps and SEARCH_REGS should not be changed
1068 (i.e. Vinhibit_changing_match_data is non-nil). */
1069 static struct re_registers search_regs_1;
1070
1071 static int
1072 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1073 RE, trt, inverse_trt, posix)
1074 Lisp_Object string;
1075 int pos;
1076 int pos_byte;
1077 int lim;
1078 int lim_byte;
1079 int n;
1080 int RE;
1081 Lisp_Object trt;
1082 Lisp_Object inverse_trt;
1083 int posix;
1084 {
1085 int len = SCHARS (string);
1086 int len_byte = SBYTES (string);
1087 register int i;
1088
1089 if (running_asynch_code)
1090 save_search_regs ();
1091
1092 /* Searching 0 times means don't move. */
1093 /* Null string is found at starting position. */
1094 if (len == 0 || n == 0)
1095 {
1096 set_search_regs (pos_byte, 0);
1097 return pos;
1098 }
1099
1100 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1101 {
1102 unsigned char *p1, *p2;
1103 int s1, s2;
1104 struct re_pattern_buffer *bufp;
1105
1106 bufp = compile_pattern (string,
1107 (NILP (Vinhibit_changing_match_data)
1108 ? &search_regs : &search_regs_1),
1109 trt, posix,
1110 !NILP (current_buffer->enable_multibyte_characters));
1111
1112 immediate_quit = 1; /* Quit immediately if user types ^G,
1113 because letting this function finish
1114 can take too long. */
1115 QUIT; /* Do a pending quit right away,
1116 to avoid paradoxical behavior */
1117 /* Get pointers and sizes of the two strings
1118 that make up the visible portion of the buffer. */
1119
1120 p1 = BEGV_ADDR;
1121 s1 = GPT_BYTE - BEGV_BYTE;
1122 p2 = GAP_END_ADDR;
1123 s2 = ZV_BYTE - GPT_BYTE;
1124 if (s1 < 0)
1125 {
1126 p2 = p1;
1127 s2 = ZV_BYTE - BEGV_BYTE;
1128 s1 = 0;
1129 }
1130 if (s2 < 0)
1131 {
1132 s1 = ZV_BYTE - BEGV_BYTE;
1133 s2 = 0;
1134 }
1135 re_match_object = Qnil;
1136
1137 while (n < 0)
1138 {
1139 int val;
1140 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1141 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1142 (NILP (Vinhibit_changing_match_data)
1143 ? &search_regs : &search_regs_1),
1144 /* Don't allow match past current point */
1145 pos_byte - BEGV_BYTE);
1146 if (val == -2)
1147 {
1148 matcher_overflow ();
1149 }
1150 if (val >= 0)
1151 {
1152 if (NILP (Vinhibit_changing_match_data))
1153 {
1154 pos_byte = search_regs.start[0] + BEGV_BYTE;
1155 for (i = 0; i < search_regs.num_regs; i++)
1156 if (search_regs.start[i] >= 0)
1157 {
1158 search_regs.start[i]
1159 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1160 search_regs.end[i]
1161 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1162 }
1163 XSETBUFFER (last_thing_searched, current_buffer);
1164 /* Set pos to the new position. */
1165 pos = search_regs.start[0];
1166 }
1167 else
1168 {
1169 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1170 /* Set pos to the new position. */
1171 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1172 }
1173 }
1174 else
1175 {
1176 immediate_quit = 0;
1177 return (n);
1178 }
1179 n++;
1180 }
1181 while (n > 0)
1182 {
1183 int val;
1184 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1185 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1186 (NILP (Vinhibit_changing_match_data)
1187 ? &search_regs : &search_regs_1),
1188 lim_byte - BEGV_BYTE);
1189 if (val == -2)
1190 {
1191 matcher_overflow ();
1192 }
1193 if (val >= 0)
1194 {
1195 if (NILP (Vinhibit_changing_match_data))
1196 {
1197 pos_byte = search_regs.end[0] + BEGV_BYTE;
1198 for (i = 0; i < search_regs.num_regs; i++)
1199 if (search_regs.start[i] >= 0)
1200 {
1201 search_regs.start[i]
1202 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1203 search_regs.end[i]
1204 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1205 }
1206 XSETBUFFER (last_thing_searched, current_buffer);
1207 pos = search_regs.end[0];
1208 }
1209 else
1210 {
1211 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1212 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1213 }
1214 }
1215 else
1216 {
1217 immediate_quit = 0;
1218 return (0 - n);
1219 }
1220 n--;
1221 }
1222 immediate_quit = 0;
1223 return (pos);
1224 }
1225 else /* non-RE case */
1226 {
1227 unsigned char *raw_pattern, *pat;
1228 int raw_pattern_size;
1229 int raw_pattern_size_byte;
1230 unsigned char *patbuf;
1231 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1232 unsigned char *base_pat;
1233 /* Set to positive if we find a non-ASCII char that need
1234 translation. Otherwise set to zero later. */
1235 int char_base = -1;
1236 int boyer_moore_ok = 1;
1237
1238 /* MULTIBYTE says whether the text to be searched is multibyte.
1239 We must convert PATTERN to match that, or we will not really
1240 find things right. */
1241
1242 if (multibyte == STRING_MULTIBYTE (string))
1243 {
1244 raw_pattern = (unsigned char *) SDATA (string);
1245 raw_pattern_size = SCHARS (string);
1246 raw_pattern_size_byte = SBYTES (string);
1247 }
1248 else if (multibyte)
1249 {
1250 raw_pattern_size = SCHARS (string);
1251 raw_pattern_size_byte
1252 = count_size_as_multibyte (SDATA (string),
1253 raw_pattern_size);
1254 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1255 copy_text (SDATA (string), raw_pattern,
1256 SCHARS (string), 0, 1);
1257 }
1258 else
1259 {
1260 /* Converting multibyte to single-byte.
1261
1262 ??? Perhaps this conversion should be done in a special way
1263 by subtracting nonascii-insert-offset from each non-ASCII char,
1264 so that only the multibyte chars which really correspond to
1265 the chosen single-byte character set can possibly match. */
1266 raw_pattern_size = SCHARS (string);
1267 raw_pattern_size_byte = SCHARS (string);
1268 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1269 copy_text (SDATA (string), raw_pattern,
1270 SBYTES (string), 1, 0);
1271 }
1272
1273 /* Copy and optionally translate the pattern. */
1274 len = raw_pattern_size;
1275 len_byte = raw_pattern_size_byte;
1276 patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1277 pat = patbuf;
1278 base_pat = raw_pattern;
1279 if (multibyte)
1280 {
1281 /* Fill patbuf by translated characters in STRING while
1282 checking if we can use boyer-moore search. If TRT is
1283 non-nil, we can use boyer-moore search only if TRT can be
1284 represented by the byte array of 256 elements. For that,
1285 all non-ASCII case-equivalents of all case-senstive
1286 characters in STRING must belong to the same charset and
1287 row. */
1288
1289 while (--len >= 0)
1290 {
1291 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1292 int c, translated, inverse;
1293 int in_charlen, charlen;
1294
1295 /* If we got here and the RE flag is set, it's because we're
1296 dealing with a regexp known to be trivial, so the backslash
1297 just quotes the next character. */
1298 if (RE && *base_pat == '\\')
1299 {
1300 len--;
1301 raw_pattern_size--;
1302 len_byte--;
1303 base_pat++;
1304 }
1305
1306 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1307
1308 if (NILP (trt))
1309 {
1310 str = base_pat;
1311 charlen = in_charlen;
1312 }
1313 else
1314 {
1315 /* Translate the character. */
1316 TRANSLATE (translated, trt, c);
1317 charlen = CHAR_STRING (translated, str_base);
1318 str = str_base;
1319
1320 /* Check if C has any other case-equivalents. */
1321 TRANSLATE (inverse, inverse_trt, c);
1322 /* If so, check if we can use boyer-moore. */
1323 if (c != inverse && boyer_moore_ok)
1324 {
1325 /* Check if all equivalents belong to the same
1326 group of characters. Note that the check of C
1327 itself is done by the last iteration. */
1328 int this_char_base = -1;
1329
1330 while (boyer_moore_ok)
1331 {
1332 if (ASCII_BYTE_P (inverse))
1333 {
1334 if (this_char_base > 0)
1335 boyer_moore_ok = 0;
1336 else
1337 {
1338 this_char_base = 0;
1339 if (char_base < 0)
1340 char_base = this_char_base;
1341 }
1342 }
1343 else if (CHAR_BYTE8_P (inverse))
1344 /* Boyer-moore search can't handle a
1345 translation of an eight-bit
1346 character. */
1347 boyer_moore_ok = 0;
1348 else if (this_char_base < 0)
1349 {
1350 this_char_base = inverse & ~0x3F;
1351 if (char_base < 0)
1352 char_base = this_char_base;
1353 else if (char_base > 0
1354 && this_char_base != char_base)
1355 boyer_moore_ok = 0;
1356 }
1357 else if ((inverse & ~0x3F) != this_char_base)
1358 boyer_moore_ok = 0;
1359 if (c == inverse)
1360 break;
1361 TRANSLATE (inverse, inverse_trt, inverse);
1362 }
1363 }
1364 }
1365 if (char_base < 0)
1366 char_base = 0;
1367
1368 /* Store this character into the translated pattern. */
1369 bcopy (str, pat, charlen);
1370 pat += charlen;
1371 base_pat += in_charlen;
1372 len_byte -= in_charlen;
1373 }
1374 }
1375 else
1376 {
1377 /* Unibyte buffer. */
1378 char_base = 0;
1379 while (--len >= 0)
1380 {
1381 int c, translated;
1382
1383 /* If we got here and the RE flag is set, it's because we're
1384 dealing with a regexp known to be trivial, so the backslash
1385 just quotes the next character. */
1386 if (RE && *base_pat == '\\')
1387 {
1388 len--;
1389 raw_pattern_size--;
1390 base_pat++;
1391 }
1392 c = *base_pat++;
1393 TRANSLATE (translated, trt, c);
1394 *pat++ = translated;
1395 }
1396 }
1397
1398 len_byte = pat - patbuf;
1399 len = raw_pattern_size;
1400 pat = base_pat = patbuf;
1401
1402 if (boyer_moore_ok)
1403 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1404 pos, pos_byte, lim, lim_byte,
1405 char_base);
1406 else
1407 return simple_search (n, pat, len, len_byte, trt,
1408 pos, pos_byte, lim, lim_byte);
1409 }
1410 }
1411 \f
1412 /* Do a simple string search N times for the string PAT,
1413 whose length is LEN/LEN_BYTE,
1414 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1415 TRT is the translation table.
1416
1417 Return the character position where the match is found.
1418 Otherwise, if M matches remained to be found, return -M.
1419
1420 This kind of search works regardless of what is in PAT and
1421 regardless of what is in TRT. It is used in cases where
1422 boyer_moore cannot work. */
1423
1424 static int
1425 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1426 int n;
1427 unsigned char *pat;
1428 int len, len_byte;
1429 Lisp_Object trt;
1430 int pos, pos_byte;
1431 int lim, lim_byte;
1432 {
1433 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1434 int forward = n > 0;
1435 /* Number of buffer bytes matched. Note that this may be different
1436 from len_byte in a multibyte buffer. */
1437 int match_byte;
1438
1439 if (lim > pos && multibyte)
1440 while (n > 0)
1441 {
1442 while (1)
1443 {
1444 /* Try matching at position POS. */
1445 int this_pos = pos;
1446 int this_pos_byte = pos_byte;
1447 int this_len = len;
1448 int this_len_byte = len_byte;
1449 unsigned char *p = pat;
1450 if (pos + len > lim || pos_byte + len_byte > lim_byte)
1451 goto stop;
1452
1453 while (this_len > 0)
1454 {
1455 int charlen, buf_charlen;
1456 int pat_ch, buf_ch;
1457
1458 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1459 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1460 ZV_BYTE - this_pos_byte,
1461 buf_charlen);
1462 TRANSLATE (buf_ch, trt, buf_ch);
1463
1464 if (buf_ch != pat_ch)
1465 break;
1466
1467 this_len_byte -= charlen;
1468 this_len--;
1469 p += charlen;
1470
1471 this_pos_byte += buf_charlen;
1472 this_pos++;
1473 }
1474
1475 if (this_len == 0)
1476 {
1477 match_byte = this_pos_byte - pos_byte;
1478 pos += len;
1479 pos_byte += match_byte;
1480 break;
1481 }
1482
1483 INC_BOTH (pos, pos_byte);
1484 }
1485
1486 n--;
1487 }
1488 else if (lim > pos)
1489 while (n > 0)
1490 {
1491 while (1)
1492 {
1493 /* Try matching at position POS. */
1494 int this_pos = pos;
1495 int this_len = len;
1496 unsigned char *p = pat;
1497
1498 if (pos + len > lim)
1499 goto stop;
1500
1501 while (this_len > 0)
1502 {
1503 int pat_ch = *p++;
1504 int buf_ch = FETCH_BYTE (this_pos);
1505 TRANSLATE (buf_ch, trt, buf_ch);
1506
1507 if (buf_ch != pat_ch)
1508 break;
1509
1510 this_len--;
1511 this_pos++;
1512 }
1513
1514 if (this_len == 0)
1515 {
1516 match_byte = len;
1517 pos += len;
1518 break;
1519 }
1520
1521 pos++;
1522 }
1523
1524 n--;
1525 }
1526 /* Backwards search. */
1527 else if (lim < pos && multibyte)
1528 while (n < 0)
1529 {
1530 while (1)
1531 {
1532 /* Try matching at position POS. */
1533 int this_pos = pos - len;
1534 int this_pos_byte;
1535 int this_len = len;
1536 int this_len_byte = len_byte;
1537 unsigned char *p = pat;
1538
1539 if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
1540 goto stop;
1541 this_pos_byte = CHAR_TO_BYTE (this_pos);
1542 match_byte = pos_byte - this_pos_byte;
1543
1544 while (this_len > 0)
1545 {
1546 int charlen, buf_charlen;
1547 int pat_ch, buf_ch;
1548
1549 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1550 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1551 ZV_BYTE - this_pos_byte,
1552 buf_charlen);
1553 TRANSLATE (buf_ch, trt, buf_ch);
1554
1555 if (buf_ch != pat_ch)
1556 break;
1557
1558 this_len_byte -= charlen;
1559 this_len--;
1560 p += charlen;
1561 this_pos_byte += buf_charlen;
1562 this_pos++;
1563 }
1564
1565 if (this_len == 0)
1566 {
1567 pos -= len;
1568 pos_byte -= match_byte;
1569 break;
1570 }
1571
1572 DEC_BOTH (pos, pos_byte);
1573 }
1574
1575 n++;
1576 }
1577 else if (lim < pos)
1578 while (n < 0)
1579 {
1580 while (1)
1581 {
1582 /* Try matching at position POS. */
1583 int this_pos = pos - len;
1584 int this_len = len;
1585 unsigned char *p = pat;
1586
1587 if (this_pos < lim)
1588 goto stop;
1589
1590 while (this_len > 0)
1591 {
1592 int pat_ch = *p++;
1593 int buf_ch = FETCH_BYTE (this_pos);
1594 TRANSLATE (buf_ch, trt, buf_ch);
1595
1596 if (buf_ch != pat_ch)
1597 break;
1598 this_len--;
1599 this_pos++;
1600 }
1601
1602 if (this_len == 0)
1603 {
1604 match_byte = len;
1605 pos -= len;
1606 break;
1607 }
1608
1609 pos--;
1610 }
1611
1612 n++;
1613 }
1614
1615 stop:
1616 if (n == 0)
1617 {
1618 if (forward)
1619 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1620 else
1621 set_search_regs (multibyte ? pos_byte : pos, match_byte);
1622
1623 return pos;
1624 }
1625 else if (n > 0)
1626 return -n;
1627 else
1628 return n;
1629 }
1630 \f
1631 /* Do Boyer-Moore search N times for the string BASE_PAT,
1632 whose length is LEN/LEN_BYTE,
1633 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1634 DIRECTION says which direction we search in.
1635 TRT and INVERSE_TRT are translation tables.
1636 Characters in PAT are already translated by TRT.
1637
1638 This kind of search works if all the characters in BASE_PAT that
1639 have nontrivial translation are the same aside from the last byte.
1640 This makes it possible to translate just the last byte of a
1641 character, and do so after just a simple test of the context.
1642 CHAR_BASE is nonzero if there is such a non-ASCII character.
1643
1644 If that criterion is not satisfied, do not call this function. */
1645
1646 static int
1647 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1648 pos, pos_byte, lim, lim_byte, char_base)
1649 int n;
1650 unsigned char *base_pat;
1651 int len, len_byte;
1652 Lisp_Object trt;
1653 Lisp_Object inverse_trt;
1654 int pos, pos_byte;
1655 int lim, lim_byte;
1656 int char_base;
1657 {
1658 int direction = ((n > 0) ? 1 : -1);
1659 register int dirlen;
1660 int infinity, limit, stride_for_teases = 0;
1661 register int *BM_tab;
1662 int *BM_tab_base;
1663 register unsigned char *cursor, *p_limit;
1664 register int i, j;
1665 unsigned char *pat, *pat_end;
1666 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1667
1668 unsigned char simple_translate[0400];
1669 /* These are set to the preceding bytes of a byte to be translated
1670 if char_base is nonzero. As the maximum byte length of a
1671 multibyte character is 5, we have to check at most four previous
1672 bytes. */
1673 int translate_prev_byte1 = 0;
1674 int translate_prev_byte2 = 0;
1675 int translate_prev_byte3 = 0;
1676 int translate_prev_byte4 = 0;
1677
1678 BM_tab = (int *) alloca (0400 * sizeof (int));
1679
1680 /* The general approach is that we are going to maintain that we know */
1681 /* the first (closest to the present position, in whatever direction */
1682 /* we're searching) character that could possibly be the last */
1683 /* (furthest from present position) character of a valid match. We */
1684 /* advance the state of our knowledge by looking at that character */
1685 /* and seeing whether it indeed matches the last character of the */
1686 /* pattern. If it does, we take a closer look. If it does not, we */
1687 /* move our pointer (to putative last characters) as far as is */
1688 /* logically possible. This amount of movement, which I call a */
1689 /* stride, will be the length of the pattern if the actual character */
1690 /* appears nowhere in the pattern, otherwise it will be the distance */
1691 /* from the last occurrence of that character to the end of the */
1692 /* pattern. */
1693 /* As a coding trick, an enormous stride is coded into the table for */
1694 /* characters that match the last character. This allows use of only */
1695 /* a single test, a test for having gone past the end of the */
1696 /* permissible match region, to test for both possible matches (when */
1697 /* the stride goes past the end immediately) and failure to */
1698 /* match (where you get nudged past the end one stride at a time). */
1699
1700 /* Here we make a "mickey mouse" BM table. The stride of the search */
1701 /* is determined only by the last character of the putative match. */
1702 /* If that character does not match, we will stride the proper */
1703 /* distance to propose a match that superimposes it on the last */
1704 /* instance of a character that matches it (per trt), or misses */
1705 /* it entirely if there is none. */
1706
1707 dirlen = len_byte * direction;
1708 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1709
1710 /* Record position after the end of the pattern. */
1711 pat_end = base_pat + len_byte;
1712 /* BASE_PAT points to a character that we start scanning from.
1713 It is the first character in a forward search,
1714 the last character in a backward search. */
1715 if (direction < 0)
1716 base_pat = pat_end - 1;
1717
1718 BM_tab_base = BM_tab;
1719 BM_tab += 0400;
1720 j = dirlen; /* to get it in a register */
1721 /* A character that does not appear in the pattern induces a */
1722 /* stride equal to the pattern length. */
1723 while (BM_tab_base != BM_tab)
1724 {
1725 *--BM_tab = j;
1726 *--BM_tab = j;
1727 *--BM_tab = j;
1728 *--BM_tab = j;
1729 }
1730
1731 /* We use this for translation, instead of TRT itself.
1732 We fill this in to handle the characters that actually
1733 occur in the pattern. Others don't matter anyway! */
1734 bzero (simple_translate, sizeof simple_translate);
1735 for (i = 0; i < 0400; i++)
1736 simple_translate[i] = i;
1737
1738 if (char_base)
1739 {
1740 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
1741 byte following them are the target of translation. */
1742 unsigned char str[MAX_MULTIBYTE_LENGTH];
1743 int len = CHAR_STRING (char_base, str);
1744
1745 translate_prev_byte1 = str[len - 2];
1746 if (len > 2)
1747 {
1748 translate_prev_byte2 = str[len - 3];
1749 if (len > 3)
1750 {
1751 translate_prev_byte3 = str[len - 4];
1752 if (len > 4)
1753 translate_prev_byte4 = str[len - 5];
1754 }
1755 }
1756 }
1757
1758 i = 0;
1759 while (i != infinity)
1760 {
1761 unsigned char *ptr = base_pat + i;
1762 i += direction;
1763 if (i == dirlen)
1764 i = infinity;
1765 if (! NILP (trt))
1766 {
1767 /* If the byte currently looking at is the last of a
1768 character to check case-equivalents, set CH to that
1769 character. An ASCII character and a non-ASCII character
1770 matching with CHAR_BASE are to be checked. */
1771 int ch = -1;
1772
1773 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1774 ch = *ptr;
1775 else if (char_base
1776 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1777 {
1778 unsigned char *charstart = ptr - 1;
1779
1780 while (! (CHAR_HEAD_P (*charstart)))
1781 charstart--;
1782 ch = STRING_CHAR (charstart, ptr - charstart + 1);
1783 if (char_base != (ch & ~0x3F))
1784 ch = -1;
1785 }
1786
1787 if (ch >= 0400)
1788 j = (ch & 0x3F) | 0200;
1789 else
1790 j = *ptr;
1791
1792 if (i == infinity)
1793 stride_for_teases = BM_tab[j];
1794
1795 BM_tab[j] = dirlen - i;
1796 /* A translation table is accompanied by its inverse -- see */
1797 /* comment following downcase_table for details */
1798 if (ch >= 0)
1799 {
1800 int starting_ch = ch;
1801 int starting_j = j;
1802
1803 while (1)
1804 {
1805 TRANSLATE (ch, inverse_trt, ch);
1806 if (ch >= 0400)
1807 j = (ch & 0x3F) | 0200;
1808 else
1809 j = ch;
1810
1811 /* For all the characters that map into CH,
1812 set up simple_translate to map the last byte
1813 into STARTING_J. */
1814 simple_translate[j] = starting_j;
1815 if (ch == starting_ch)
1816 break;
1817 BM_tab[j] = dirlen - i;
1818 }
1819 }
1820 }
1821 else
1822 {
1823 j = *ptr;
1824
1825 if (i == infinity)
1826 stride_for_teases = BM_tab[j];
1827 BM_tab[j] = dirlen - i;
1828 }
1829 /* stride_for_teases tells how much to stride if we get a */
1830 /* match on the far character but are subsequently */
1831 /* disappointed, by recording what the stride would have been */
1832 /* for that character if the last character had been */
1833 /* different. */
1834 }
1835 infinity = dirlen - infinity;
1836 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1837 /* loop invariant - POS_BYTE points at where last char (first
1838 char if reverse) of pattern would align in a possible match. */
1839 while (n != 0)
1840 {
1841 int tail_end;
1842 unsigned char *tail_end_ptr;
1843
1844 /* It's been reported that some (broken) compiler thinks that
1845 Boolean expressions in an arithmetic context are unsigned.
1846 Using an explicit ?1:0 prevents this. */
1847 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1848 < 0)
1849 return (n * (0 - direction));
1850 /* First we do the part we can by pointers (maybe nothing) */
1851 QUIT;
1852 pat = base_pat;
1853 limit = pos_byte - dirlen + direction;
1854 if (direction > 0)
1855 {
1856 limit = BUFFER_CEILING_OF (limit);
1857 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1858 can take on without hitting edge of buffer or the gap. */
1859 limit = min (limit, pos_byte + 20000);
1860 limit = min (limit, lim_byte - 1);
1861 }
1862 else
1863 {
1864 limit = BUFFER_FLOOR_OF (limit);
1865 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1866 can take on without hitting edge of buffer or the gap. */
1867 limit = max (limit, pos_byte - 20000);
1868 limit = max (limit, lim_byte);
1869 }
1870 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1871 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1872
1873 if ((limit - pos_byte) * direction > 20)
1874 {
1875 unsigned char *p2;
1876
1877 p_limit = BYTE_POS_ADDR (limit);
1878 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1879 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1880 while (1) /* use one cursor setting as long as i can */
1881 {
1882 if (direction > 0) /* worth duplicating */
1883 {
1884 /* Use signed comparison if appropriate
1885 to make cursor+infinity sure to be > p_limit.
1886 Assuming that the buffer lies in a range of addresses
1887 that are all "positive" (as ints) or all "negative",
1888 either kind of comparison will work as long
1889 as we don't step by infinity. So pick the kind
1890 that works when we do step by infinity. */
1891 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1892 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1893 cursor += BM_tab[*cursor];
1894 else
1895 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1896 cursor += BM_tab[*cursor];
1897 }
1898 else
1899 {
1900 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1901 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1902 cursor += BM_tab[*cursor];
1903 else
1904 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1905 cursor += BM_tab[*cursor];
1906 }
1907 /* If you are here, cursor is beyond the end of the searched region. */
1908 /* This can happen if you match on the far character of the pattern, */
1909 /* because the "stride" of that character is infinity, a number able */
1910 /* to throw you well beyond the end of the search. It can also */
1911 /* happen if you fail to match within the permitted region and would */
1912 /* otherwise try a character beyond that region */
1913 if ((cursor - p_limit) * direction <= len_byte)
1914 break; /* a small overrun is genuine */
1915 cursor -= infinity; /* large overrun = hit */
1916 i = dirlen - direction;
1917 if (! NILP (trt))
1918 {
1919 while ((i -= direction) + direction != 0)
1920 {
1921 int ch;
1922 cursor -= direction;
1923 /* Translate only the last byte of a character. */
1924 if (! multibyte
1925 || ((cursor == tail_end_ptr
1926 || CHAR_HEAD_P (cursor[1]))
1927 && (CHAR_HEAD_P (cursor[0])
1928 /* Check if this is the last byte of
1929 a translable character. */
1930 || (translate_prev_byte1 == cursor[-1]
1931 && (CHAR_HEAD_P (translate_prev_byte1)
1932 || (translate_prev_byte2 == cursor[-2]
1933 && (CHAR_HEAD_P (translate_prev_byte2)
1934 || (translate_prev_byte3 == cursor[-3]))))))))
1935 ch = simple_translate[*cursor];
1936 else
1937 ch = *cursor;
1938 if (pat[i] != ch)
1939 break;
1940 }
1941 }
1942 else
1943 {
1944 while ((i -= direction) + direction != 0)
1945 {
1946 cursor -= direction;
1947 if (pat[i] != *cursor)
1948 break;
1949 }
1950 }
1951 cursor += dirlen - i - direction; /* fix cursor */
1952 if (i + direction == 0)
1953 {
1954 int position, start, end;
1955
1956 cursor -= direction;
1957
1958 position = pos_byte + cursor - p2 + ((direction > 0)
1959 ? 1 - len_byte : 0);
1960 set_search_regs (position, len_byte);
1961
1962 if (NILP (Vinhibit_changing_match_data))
1963 {
1964 start = search_regs.start[0];
1965 end = search_regs.end[0];
1966 }
1967 else
1968 /* If Vinhibit_changing_match_data is non-nil,
1969 search_regs will not be changed. So let's
1970 compute start and end here. */
1971 {
1972 start = BYTE_TO_CHAR (position);
1973 end = BYTE_TO_CHAR (position + len_byte);
1974 }
1975
1976 if ((n -= direction) != 0)
1977 cursor += dirlen; /* to resume search */
1978 else
1979 return direction > 0 ? end : start;
1980 }
1981 else
1982 cursor += stride_for_teases; /* <sigh> we lose - */
1983 }
1984 pos_byte += cursor - p2;
1985 }
1986 else
1987 /* Now we'll pick up a clump that has to be done the hard */
1988 /* way because it covers a discontinuity */
1989 {
1990 limit = ((direction > 0)
1991 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1992 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1993 limit = ((direction > 0)
1994 ? min (limit + len_byte, lim_byte - 1)
1995 : max (limit - len_byte, lim_byte));
1996 /* LIMIT is now the last value POS_BYTE can have
1997 and still be valid for a possible match. */
1998 while (1)
1999 {
2000 /* This loop can be coded for space rather than */
2001 /* speed because it will usually run only once. */
2002 /* (the reach is at most len + 21, and typically */
2003 /* does not exceed len) */
2004 while ((limit - pos_byte) * direction >= 0)
2005 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2006 /* now run the same tests to distinguish going off the */
2007 /* end, a match or a phony match. */
2008 if ((pos_byte - limit) * direction <= len_byte)
2009 break; /* ran off the end */
2010 /* Found what might be a match.
2011 Set POS_BYTE back to last (first if reverse) pos. */
2012 pos_byte -= infinity;
2013 i = dirlen - direction;
2014 while ((i -= direction) + direction != 0)
2015 {
2016 int ch;
2017 unsigned char *ptr;
2018 pos_byte -= direction;
2019 ptr = BYTE_POS_ADDR (pos_byte);
2020 /* Translate only the last byte of a character. */
2021 if (! multibyte
2022 || ((ptr == tail_end_ptr
2023 || CHAR_HEAD_P (ptr[1]))
2024 && (CHAR_HEAD_P (ptr[0])
2025 /* Check if this is the last byte of a
2026 translable character. */
2027 || (translate_prev_byte1 == ptr[-1]
2028 && (CHAR_HEAD_P (translate_prev_byte1)
2029 || (translate_prev_byte2 == ptr[-2]
2030 && (CHAR_HEAD_P (translate_prev_byte2)
2031 || translate_prev_byte3 == ptr[-3])))))))
2032 ch = simple_translate[*ptr];
2033 else
2034 ch = *ptr;
2035 if (pat[i] != ch)
2036 break;
2037 }
2038 /* Above loop has moved POS_BYTE part or all the way
2039 back to the first pos (last pos if reverse).
2040 Set it once again at the last (first if reverse) char. */
2041 pos_byte += dirlen - i- direction;
2042 if (i + direction == 0)
2043 {
2044 int position, start, end;
2045 pos_byte -= direction;
2046
2047 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2048 set_search_regs (position, len_byte);
2049
2050 if (NILP (Vinhibit_changing_match_data))
2051 {
2052 start = search_regs.start[0];
2053 end = search_regs.end[0];
2054 }
2055 else
2056 /* If Vinhibit_changing_match_data is non-nil,
2057 search_regs will not be changed. So let's
2058 compute start and end here. */
2059 {
2060 start = BYTE_TO_CHAR (position);
2061 end = BYTE_TO_CHAR (position + len_byte);
2062 }
2063
2064 if ((n -= direction) != 0)
2065 pos_byte += dirlen; /* to resume search */
2066 else
2067 return direction > 0 ? end : start;
2068 }
2069 else
2070 pos_byte += stride_for_teases;
2071 }
2072 }
2073 /* We have done one clump. Can we continue? */
2074 if ((lim_byte - pos_byte) * direction < 0)
2075 return ((0 - n) * direction);
2076 }
2077 return BYTE_TO_CHAR (pos_byte);
2078 }
2079
2080 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2081 for the overall match just found in the current buffer.
2082 Also clear out the match data for registers 1 and up. */
2083
2084 static void
2085 set_search_regs (beg_byte, nbytes)
2086 int beg_byte, nbytes;
2087 {
2088 int i;
2089
2090 if (!NILP (Vinhibit_changing_match_data))
2091 return;
2092
2093 /* Make sure we have registers in which to store
2094 the match position. */
2095 if (search_regs.num_regs == 0)
2096 {
2097 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2098 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2099 search_regs.num_regs = 2;
2100 }
2101
2102 /* Clear out the other registers. */
2103 for (i = 1; i < search_regs.num_regs; i++)
2104 {
2105 search_regs.start[i] = -1;
2106 search_regs.end[i] = -1;
2107 }
2108
2109 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2110 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2111 XSETBUFFER (last_thing_searched, current_buffer);
2112 }
2113 \f
2114 /* Given a string of words separated by word delimiters,
2115 compute a regexp that matches those exact words
2116 separated by arbitrary punctuation. */
2117
2118 static Lisp_Object
2119 wordify (string)
2120 Lisp_Object string;
2121 {
2122 register unsigned char *p, *o;
2123 register int i, i_byte, len, punct_count = 0, word_count = 0;
2124 Lisp_Object val;
2125 int prev_c = 0;
2126 int adjust;
2127
2128 CHECK_STRING (string);
2129 p = SDATA (string);
2130 len = SCHARS (string);
2131
2132 for (i = 0, i_byte = 0; i < len; )
2133 {
2134 int c;
2135
2136 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2137
2138 if (SYNTAX (c) != Sword)
2139 {
2140 punct_count++;
2141 if (i > 0 && SYNTAX (prev_c) == Sword)
2142 word_count++;
2143 }
2144
2145 prev_c = c;
2146 }
2147
2148 if (SYNTAX (prev_c) == Sword)
2149 word_count++;
2150 if (!word_count)
2151 return empty_unibyte_string;
2152
2153 adjust = - punct_count + 5 * (word_count - 1) + 4;
2154 if (STRING_MULTIBYTE (string))
2155 val = make_uninit_multibyte_string (len + adjust,
2156 SBYTES (string)
2157 + adjust);
2158 else
2159 val = make_uninit_string (len + adjust);
2160
2161 o = SDATA (val);
2162 *o++ = '\\';
2163 *o++ = 'b';
2164 prev_c = 0;
2165
2166 for (i = 0, i_byte = 0; i < len; )
2167 {
2168 int c;
2169 int i_byte_orig = i_byte;
2170
2171 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2172
2173 if (SYNTAX (c) == Sword)
2174 {
2175 bcopy (SDATA (string) + i_byte_orig, o,
2176 i_byte - i_byte_orig);
2177 o += i_byte - i_byte_orig;
2178 }
2179 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2180 {
2181 *o++ = '\\';
2182 *o++ = 'W';
2183 *o++ = '\\';
2184 *o++ = 'W';
2185 *o++ = '*';
2186 }
2187
2188 prev_c = c;
2189 }
2190
2191 *o++ = '\\';
2192 *o++ = 'b';
2193
2194 return val;
2195 }
2196 \f
2197 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2198 "MSearch backward: ",
2199 doc: /* Search backward from point for STRING.
2200 Set point to the beginning of the occurrence found, and return point.
2201 An optional second argument bounds the search; it is a buffer position.
2202 The match found must not extend before that position.
2203 Optional third argument, if t, means if fail just return nil (no error).
2204 If not nil and not t, position at limit of search and return nil.
2205 Optional fourth argument is repeat count--search for successive occurrences.
2206
2207 Search case-sensitivity is determined by the value of the variable
2208 `case-fold-search', which see.
2209
2210 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2211 (string, bound, noerror, count)
2212 Lisp_Object string, bound, noerror, count;
2213 {
2214 return search_command (string, bound, noerror, count, -1, 0, 0);
2215 }
2216
2217 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2218 doc: /* Search forward from point for STRING.
2219 Set point to the end of the occurrence found, and return point.
2220 An optional second argument bounds the search; it is a buffer position.
2221 The match found must not extend after that position. A value of nil is
2222 equivalent to (point-max).
2223 Optional third argument, if t, means if fail just return nil (no error).
2224 If not nil and not t, move to limit of search and return nil.
2225 Optional fourth argument is repeat count--search for successive occurrences.
2226
2227 Search case-sensitivity is determined by the value of the variable
2228 `case-fold-search', which see.
2229
2230 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2231 (string, bound, noerror, count)
2232 Lisp_Object string, bound, noerror, count;
2233 {
2234 return search_command (string, bound, noerror, count, 1, 0, 0);
2235 }
2236
2237 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2238 "sWord search backward: ",
2239 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2240 Set point to the beginning of the occurrence found, and return point.
2241 An optional second argument bounds the search; it is a buffer position.
2242 The match found must not extend before that position.
2243 Optional third argument, if t, means if fail just return nil (no error).
2244 If not nil and not t, move to limit of search and return nil.
2245 Optional fourth argument is repeat count--search for successive occurrences. */)
2246 (string, bound, noerror, count)
2247 Lisp_Object string, bound, noerror, count;
2248 {
2249 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2250 }
2251
2252 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2253 "sWord search: ",
2254 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2255 Set point to the end of the occurrence found, and return point.
2256 An optional second argument bounds the search; it is a buffer position.
2257 The match found must not extend after that position.
2258 Optional third argument, if t, means if fail just return nil (no error).
2259 If not nil and not t, move to limit of search and return nil.
2260 Optional fourth argument is repeat count--search for successive occurrences. */)
2261 (string, bound, noerror, count)
2262 Lisp_Object string, bound, noerror, count;
2263 {
2264 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2265 }
2266
2267 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2268 "sRE search backward: ",
2269 doc: /* Search backward from point for match for regular expression REGEXP.
2270 Set point to the beginning of the match, and return point.
2271 The match found is the one starting last in the buffer
2272 and yet ending before the origin of the search.
2273 An optional second argument bounds the search; it is a buffer position.
2274 The match found must start at or after that position.
2275 Optional third argument, if t, means if fail just return nil (no error).
2276 If not nil and not t, move to limit of search and return nil.
2277 Optional fourth argument is repeat count--search for successive occurrences.
2278 See also the functions `match-beginning', `match-end', `match-string',
2279 and `replace-match'. */)
2280 (regexp, bound, noerror, count)
2281 Lisp_Object regexp, bound, noerror, count;
2282 {
2283 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2284 }
2285
2286 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2287 "sRE search: ",
2288 doc: /* Search forward from point for regular expression REGEXP.
2289 Set point to the end of the occurrence found, and return point.
2290 An optional second argument bounds the search; it is a buffer position.
2291 The match found must not extend after that position.
2292 Optional third argument, if t, means if fail just return nil (no error).
2293 If not nil and not t, move to limit of search and return nil.
2294 Optional fourth argument is repeat count--search for successive occurrences.
2295 See also the functions `match-beginning', `match-end', `match-string',
2296 and `replace-match'. */)
2297 (regexp, bound, noerror, count)
2298 Lisp_Object regexp, bound, noerror, count;
2299 {
2300 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2301 }
2302
2303 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2304 "sPosix search backward: ",
2305 doc: /* Search backward from point for match for regular expression REGEXP.
2306 Find the longest match in accord with Posix regular expression rules.
2307 Set point to the beginning of the match, and return point.
2308 The match found is the one starting last in the buffer
2309 and yet ending before the origin of the search.
2310 An optional second argument bounds the search; it is a buffer position.
2311 The match found must start at or after that position.
2312 Optional third argument, if t, means if fail just return nil (no error).
2313 If not nil and not t, move to limit of search and return nil.
2314 Optional fourth argument is repeat count--search for successive occurrences.
2315 See also the functions `match-beginning', `match-end', `match-string',
2316 and `replace-match'. */)
2317 (regexp, bound, noerror, count)
2318 Lisp_Object regexp, bound, noerror, count;
2319 {
2320 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2321 }
2322
2323 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2324 "sPosix search: ",
2325 doc: /* Search forward from point for regular expression REGEXP.
2326 Find the longest match in accord with Posix regular expression rules.
2327 Set point to the end of the occurrence found, and return point.
2328 An optional second argument bounds the search; it is a buffer position.
2329 The match found must not extend after that position.
2330 Optional third argument, if t, means if fail just return nil (no error).
2331 If not nil and not t, move to limit of search and return nil.
2332 Optional fourth argument is repeat count--search for successive occurrences.
2333 See also the functions `match-beginning', `match-end', `match-string',
2334 and `replace-match'. */)
2335 (regexp, bound, noerror, count)
2336 Lisp_Object regexp, bound, noerror, count;
2337 {
2338 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2339 }
2340 \f
2341 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2342 doc: /* Replace text matched by last search with NEWTEXT.
2343 Leave point at the end of the replacement text.
2344
2345 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2346 Otherwise maybe capitalize the whole text, or maybe just word initials,
2347 based on the replaced text.
2348 If the replaced text has only capital letters
2349 and has at least one multiletter word, convert NEWTEXT to all caps.
2350 Otherwise if all words are capitalized in the replaced text,
2351 capitalize each word in NEWTEXT.
2352
2353 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2354 Otherwise treat `\\' as special:
2355 `\\&' in NEWTEXT means substitute original matched text.
2356 `\\N' means substitute what matched the Nth `\\(...\\)'.
2357 If Nth parens didn't match, substitute nothing.
2358 `\\\\' means insert one `\\'.
2359 Case conversion does not apply to these substitutions.
2360
2361 FIXEDCASE and LITERAL are optional arguments.
2362
2363 The optional fourth argument STRING can be a string to modify.
2364 This is meaningful when the previous match was done against STRING,
2365 using `string-match'. When used this way, `replace-match'
2366 creates and returns a new string made by copying STRING and replacing
2367 the part of STRING that was matched.
2368
2369 The optional fifth argument SUBEXP specifies a subexpression;
2370 it says to replace just that subexpression with NEWTEXT,
2371 rather than replacing the entire matched text.
2372 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2373 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2374 NEWTEXT in place of subexp N.
2375 This is useful only after a regular expression search or match,
2376 since only regular expressions have distinguished subexpressions. */)
2377 (newtext, fixedcase, literal, string, subexp)
2378 Lisp_Object newtext, fixedcase, literal, string, subexp;
2379 {
2380 enum { nochange, all_caps, cap_initial } case_action;
2381 register int pos, pos_byte;
2382 int some_multiletter_word;
2383 int some_lowercase;
2384 int some_uppercase;
2385 int some_nonuppercase_initial;
2386 register int c, prevc;
2387 int sub;
2388 int opoint, newpoint;
2389
2390 CHECK_STRING (newtext);
2391
2392 if (! NILP (string))
2393 CHECK_STRING (string);
2394
2395 case_action = nochange; /* We tried an initialization */
2396 /* but some C compilers blew it */
2397
2398 if (search_regs.num_regs <= 0)
2399 error ("`replace-match' called before any match found");
2400
2401 if (NILP (subexp))
2402 sub = 0;
2403 else
2404 {
2405 CHECK_NUMBER (subexp);
2406 sub = XINT (subexp);
2407 if (sub < 0 || sub >= search_regs.num_regs)
2408 args_out_of_range (subexp, make_number (search_regs.num_regs));
2409 }
2410
2411 if (NILP (string))
2412 {
2413 if (search_regs.start[sub] < BEGV
2414 || search_regs.start[sub] > search_regs.end[sub]
2415 || search_regs.end[sub] > ZV)
2416 args_out_of_range (make_number (search_regs.start[sub]),
2417 make_number (search_regs.end[sub]));
2418 }
2419 else
2420 {
2421 if (search_regs.start[sub] < 0
2422 || search_regs.start[sub] > search_regs.end[sub]
2423 || search_regs.end[sub] > SCHARS (string))
2424 args_out_of_range (make_number (search_regs.start[sub]),
2425 make_number (search_regs.end[sub]));
2426 }
2427
2428 if (NILP (fixedcase))
2429 {
2430 /* Decide how to casify by examining the matched text. */
2431 int last;
2432
2433 pos = search_regs.start[sub];
2434 last = search_regs.end[sub];
2435
2436 if (NILP (string))
2437 pos_byte = CHAR_TO_BYTE (pos);
2438 else
2439 pos_byte = string_char_to_byte (string, pos);
2440
2441 prevc = '\n';
2442 case_action = all_caps;
2443
2444 /* some_multiletter_word is set nonzero if any original word
2445 is more than one letter long. */
2446 some_multiletter_word = 0;
2447 some_lowercase = 0;
2448 some_nonuppercase_initial = 0;
2449 some_uppercase = 0;
2450
2451 while (pos < last)
2452 {
2453 if (NILP (string))
2454 {
2455 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2456 INC_BOTH (pos, pos_byte);
2457 }
2458 else
2459 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2460
2461 if (LOWERCASEP (c))
2462 {
2463 /* Cannot be all caps if any original char is lower case */
2464
2465 some_lowercase = 1;
2466 if (SYNTAX (prevc) != Sword)
2467 some_nonuppercase_initial = 1;
2468 else
2469 some_multiletter_word = 1;
2470 }
2471 else if (UPPERCASEP (c))
2472 {
2473 some_uppercase = 1;
2474 if (SYNTAX (prevc) != Sword)
2475 ;
2476 else
2477 some_multiletter_word = 1;
2478 }
2479 else
2480 {
2481 /* If the initial is a caseless word constituent,
2482 treat that like a lowercase initial. */
2483 if (SYNTAX (prevc) != Sword)
2484 some_nonuppercase_initial = 1;
2485 }
2486
2487 prevc = c;
2488 }
2489
2490 /* Convert to all caps if the old text is all caps
2491 and has at least one multiletter word. */
2492 if (! some_lowercase && some_multiletter_word)
2493 case_action = all_caps;
2494 /* Capitalize each word, if the old text has all capitalized words. */
2495 else if (!some_nonuppercase_initial && some_multiletter_word)
2496 case_action = cap_initial;
2497 else if (!some_nonuppercase_initial && some_uppercase)
2498 /* Should x -> yz, operating on X, give Yz or YZ?
2499 We'll assume the latter. */
2500 case_action = all_caps;
2501 else
2502 case_action = nochange;
2503 }
2504
2505 /* Do replacement in a string. */
2506 if (!NILP (string))
2507 {
2508 Lisp_Object before, after;
2509
2510 before = Fsubstring (string, make_number (0),
2511 make_number (search_regs.start[sub]));
2512 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2513
2514 /* Substitute parts of the match into NEWTEXT
2515 if desired. */
2516 if (NILP (literal))
2517 {
2518 int lastpos = 0;
2519 int lastpos_byte = 0;
2520 /* We build up the substituted string in ACCUM. */
2521 Lisp_Object accum;
2522 Lisp_Object middle;
2523 int length = SBYTES (newtext);
2524
2525 accum = Qnil;
2526
2527 for (pos_byte = 0, pos = 0; pos_byte < length;)
2528 {
2529 int substart = -1;
2530 int subend = 0;
2531 int delbackslash = 0;
2532
2533 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2534
2535 if (c == '\\')
2536 {
2537 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2538
2539 if (c == '&')
2540 {
2541 substart = search_regs.start[sub];
2542 subend = search_regs.end[sub];
2543 }
2544 else if (c >= '1' && c <= '9')
2545 {
2546 if (search_regs.start[c - '0'] >= 0
2547 && c <= search_regs.num_regs + '0')
2548 {
2549 substart = search_regs.start[c - '0'];
2550 subend = search_regs.end[c - '0'];
2551 }
2552 else
2553 {
2554 /* If that subexp did not match,
2555 replace \\N with nothing. */
2556 substart = 0;
2557 subend = 0;
2558 }
2559 }
2560 else if (c == '\\')
2561 delbackslash = 1;
2562 else
2563 error ("Invalid use of `\\' in replacement text");
2564 }
2565 if (substart >= 0)
2566 {
2567 if (pos - 2 != lastpos)
2568 middle = substring_both (newtext, lastpos,
2569 lastpos_byte,
2570 pos - 2, pos_byte - 2);
2571 else
2572 middle = Qnil;
2573 accum = concat3 (accum, middle,
2574 Fsubstring (string,
2575 make_number (substart),
2576 make_number (subend)));
2577 lastpos = pos;
2578 lastpos_byte = pos_byte;
2579 }
2580 else if (delbackslash)
2581 {
2582 middle = substring_both (newtext, lastpos,
2583 lastpos_byte,
2584 pos - 1, pos_byte - 1);
2585
2586 accum = concat2 (accum, middle);
2587 lastpos = pos;
2588 lastpos_byte = pos_byte;
2589 }
2590 }
2591
2592 if (pos != lastpos)
2593 middle = substring_both (newtext, lastpos,
2594 lastpos_byte,
2595 pos, pos_byte);
2596 else
2597 middle = Qnil;
2598
2599 newtext = concat2 (accum, middle);
2600 }
2601
2602 /* Do case substitution in NEWTEXT if desired. */
2603 if (case_action == all_caps)
2604 newtext = Fupcase (newtext);
2605 else if (case_action == cap_initial)
2606 newtext = Fupcase_initials (newtext);
2607
2608 return concat3 (before, newtext, after);
2609 }
2610
2611 /* Record point, then move (quietly) to the start of the match. */
2612 if (PT >= search_regs.end[sub])
2613 opoint = PT - ZV;
2614 else if (PT > search_regs.start[sub])
2615 opoint = search_regs.end[sub] - ZV;
2616 else
2617 opoint = PT;
2618
2619 /* If we want non-literal replacement,
2620 perform substitution on the replacement string. */
2621 if (NILP (literal))
2622 {
2623 int length = SBYTES (newtext);
2624 unsigned char *substed;
2625 int substed_alloc_size, substed_len;
2626 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2627 int str_multibyte = STRING_MULTIBYTE (newtext);
2628 Lisp_Object rev_tbl;
2629 int really_changed = 0;
2630
2631 rev_tbl = Qnil;
2632
2633 substed_alloc_size = length * 2 + 100;
2634 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2635 substed_len = 0;
2636
2637 /* Go thru NEWTEXT, producing the actual text to insert in
2638 SUBSTED while adjusting multibyteness to that of the current
2639 buffer. */
2640
2641 for (pos_byte = 0, pos = 0; pos_byte < length;)
2642 {
2643 unsigned char str[MAX_MULTIBYTE_LENGTH];
2644 unsigned char *add_stuff = NULL;
2645 int add_len = 0;
2646 int idx = -1;
2647
2648 if (str_multibyte)
2649 {
2650 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2651 if (!buf_multibyte)
2652 c = multibyte_char_to_unibyte (c, rev_tbl);
2653 }
2654 else
2655 {
2656 /* Note that we don't have to increment POS. */
2657 c = SREF (newtext, pos_byte++);
2658 if (buf_multibyte)
2659 c = unibyte_char_to_multibyte (c);
2660 }
2661
2662 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2663 or set IDX to a match index, which means put that part
2664 of the buffer text into SUBSTED. */
2665
2666 if (c == '\\')
2667 {
2668 really_changed = 1;
2669
2670 if (str_multibyte)
2671 {
2672 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2673 pos, pos_byte);
2674 if (!buf_multibyte && !ASCII_CHAR_P (c))
2675 c = multibyte_char_to_unibyte (c, rev_tbl);
2676 }
2677 else
2678 {
2679 c = SREF (newtext, pos_byte++);
2680 if (buf_multibyte)
2681 c = unibyte_char_to_multibyte (c);
2682 }
2683
2684 if (c == '&')
2685 idx = sub;
2686 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2687 {
2688 if (search_regs.start[c - '0'] >= 1)
2689 idx = c - '0';
2690 }
2691 else if (c == '\\')
2692 add_len = 1, add_stuff = "\\";
2693 else
2694 {
2695 xfree (substed);
2696 error ("Invalid use of `\\' in replacement text");
2697 }
2698 }
2699 else
2700 {
2701 add_len = CHAR_STRING (c, str);
2702 add_stuff = str;
2703 }
2704
2705 /* If we want to copy part of a previous match,
2706 set up ADD_STUFF and ADD_LEN to point to it. */
2707 if (idx >= 0)
2708 {
2709 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2710 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2711 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2712 move_gap (search_regs.start[idx]);
2713 add_stuff = BYTE_POS_ADDR (begbyte);
2714 }
2715
2716 /* Now the stuff we want to add to SUBSTED
2717 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2718
2719 /* Make sure SUBSTED is big enough. */
2720 if (substed_len + add_len >= substed_alloc_size)
2721 {
2722 substed_alloc_size = substed_len + add_len + 500;
2723 substed = (unsigned char *) xrealloc (substed,
2724 substed_alloc_size + 1);
2725 }
2726
2727 /* Now add to the end of SUBSTED. */
2728 if (add_stuff)
2729 {
2730 bcopy (add_stuff, substed + substed_len, add_len);
2731 substed_len += add_len;
2732 }
2733 }
2734
2735 if (really_changed)
2736 {
2737 if (buf_multibyte)
2738 {
2739 int nchars = multibyte_chars_in_text (substed, substed_len);
2740
2741 newtext = make_multibyte_string (substed, nchars, substed_len);
2742 }
2743 else
2744 newtext = make_unibyte_string (substed, substed_len);
2745 }
2746 xfree (substed);
2747 }
2748
2749 /* Replace the old text with the new in the cleanest possible way. */
2750 replace_range (search_regs.start[sub], search_regs.end[sub],
2751 newtext, 1, 0, 1);
2752 newpoint = search_regs.start[sub] + SCHARS (newtext);
2753
2754 if (case_action == all_caps)
2755 Fupcase_region (make_number (search_regs.start[sub]),
2756 make_number (newpoint));
2757 else if (case_action == cap_initial)
2758 Fupcase_initials_region (make_number (search_regs.start[sub]),
2759 make_number (newpoint));
2760
2761 /* Adjust search data for this change. */
2762 {
2763 int oldend = search_regs.end[sub];
2764 int oldstart = search_regs.start[sub];
2765 int change = newpoint - search_regs.end[sub];
2766 int i;
2767
2768 for (i = 0; i < search_regs.num_regs; i++)
2769 {
2770 if (search_regs.start[i] >= oldend)
2771 search_regs.start[i] += change;
2772 else if (search_regs.start[i] > oldstart)
2773 search_regs.start[i] = oldstart;
2774 if (search_regs.end[i] >= oldend)
2775 search_regs.end[i] += change;
2776 else if (search_regs.end[i] > oldstart)
2777 search_regs.end[i] = oldstart;
2778 }
2779 }
2780
2781 /* Put point back where it was in the text. */
2782 if (opoint <= 0)
2783 TEMP_SET_PT (opoint + ZV);
2784 else
2785 TEMP_SET_PT (opoint);
2786
2787 /* Now move point "officially" to the start of the inserted replacement. */
2788 move_if_not_intangible (newpoint);
2789
2790 return Qnil;
2791 }
2792 \f
2793 static Lisp_Object
2794 match_limit (num, beginningp)
2795 Lisp_Object num;
2796 int beginningp;
2797 {
2798 register int n;
2799
2800 CHECK_NUMBER (num);
2801 n = XINT (num);
2802 if (n < 0)
2803 args_out_of_range (num, make_number (0));
2804 if (search_regs.num_regs <= 0)
2805 error ("No match data, because no search succeeded");
2806 if (n >= search_regs.num_regs
2807 || search_regs.start[n] < 0)
2808 return Qnil;
2809 return (make_number ((beginningp) ? search_regs.start[n]
2810 : search_regs.end[n]));
2811 }
2812
2813 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2814 doc: /* Return position of start of text matched by last search.
2815 SUBEXP, a number, specifies which parenthesized expression in the last
2816 regexp.
2817 Value is nil if SUBEXPth pair didn't match, or there were less than
2818 SUBEXP pairs.
2819 Zero means the entire text matched by the whole regexp or whole string. */)
2820 (subexp)
2821 Lisp_Object subexp;
2822 {
2823 return match_limit (subexp, 1);
2824 }
2825
2826 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2827 doc: /* Return position of end of text matched by last search.
2828 SUBEXP, a number, specifies which parenthesized expression in the last
2829 regexp.
2830 Value is nil if SUBEXPth pair didn't match, or there were less than
2831 SUBEXP pairs.
2832 Zero means the entire text matched by the whole regexp or whole string. */)
2833 (subexp)
2834 Lisp_Object subexp;
2835 {
2836 return match_limit (subexp, 0);
2837 }
2838
2839 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2840 doc: /* Return a list containing all info on what the last search matched.
2841 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2842 All the elements are markers or nil (nil if the Nth pair didn't match)
2843 if the last match was on a buffer; integers or nil if a string was matched.
2844 Use `store-match-data' to reinstate the data in this list.
2845
2846 If INTEGERS (the optional first argument) is non-nil, always use
2847 integers \(rather than markers) to represent buffer positions. In
2848 this case, and if the last match was in a buffer, the buffer will get
2849 stored as one additional element at the end of the list.
2850
2851 If REUSE is a list, reuse it as part of the value. If REUSE is long
2852 enough to hold all the values, and if INTEGERS is non-nil, no consing
2853 is done.
2854
2855 If optional third arg RESEAT is non-nil, any previous markers on the
2856 REUSE list will be modified to point to nowhere.
2857
2858 Return value is undefined if the last search failed. */)
2859 (integers, reuse, reseat)
2860 Lisp_Object integers, reuse, reseat;
2861 {
2862 Lisp_Object tail, prev;
2863 Lisp_Object *data;
2864 int i, len;
2865
2866 if (!NILP (reseat))
2867 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2868 if (MARKERP (XCAR (tail)))
2869 {
2870 unchain_marker (XMARKER (XCAR (tail)));
2871 XSETCAR (tail, Qnil);
2872 }
2873
2874 if (NILP (last_thing_searched))
2875 return Qnil;
2876
2877 prev = Qnil;
2878
2879 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2880 * sizeof (Lisp_Object));
2881
2882 len = 0;
2883 for (i = 0; i < search_regs.num_regs; i++)
2884 {
2885 int start = search_regs.start[i];
2886 if (start >= 0)
2887 {
2888 if (EQ (last_thing_searched, Qt)
2889 || ! NILP (integers))
2890 {
2891 XSETFASTINT (data[2 * i], start);
2892 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2893 }
2894 else if (BUFFERP (last_thing_searched))
2895 {
2896 data[2 * i] = Fmake_marker ();
2897 Fset_marker (data[2 * i],
2898 make_number (start),
2899 last_thing_searched);
2900 data[2 * i + 1] = Fmake_marker ();
2901 Fset_marker (data[2 * i + 1],
2902 make_number (search_regs.end[i]),
2903 last_thing_searched);
2904 }
2905 else
2906 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2907 abort ();
2908
2909 len = 2 * i + 2;
2910 }
2911 else
2912 data[2 * i] = data[2 * i + 1] = Qnil;
2913 }
2914
2915 if (BUFFERP (last_thing_searched) && !NILP (integers))
2916 {
2917 data[len] = last_thing_searched;
2918 len++;
2919 }
2920
2921 /* If REUSE is not usable, cons up the values and return them. */
2922 if (! CONSP (reuse))
2923 return Flist (len, data);
2924
2925 /* If REUSE is a list, store as many value elements as will fit
2926 into the elements of REUSE. */
2927 for (i = 0, tail = reuse; CONSP (tail);
2928 i++, tail = XCDR (tail))
2929 {
2930 if (i < len)
2931 XSETCAR (tail, data[i]);
2932 else
2933 XSETCAR (tail, Qnil);
2934 prev = tail;
2935 }
2936
2937 /* If we couldn't fit all value elements into REUSE,
2938 cons up the rest of them and add them to the end of REUSE. */
2939 if (i < len)
2940 XSETCDR (prev, Flist (len - i, data + i));
2941
2942 return reuse;
2943 }
2944
2945 /* We used to have an internal use variant of `reseat' described as:
2946
2947 If RESEAT is `evaporate', put the markers back on the free list
2948 immediately. No other references to the markers must exist in this
2949 case, so it is used only internally on the unwind stack and
2950 save-match-data from Lisp.
2951
2952 But it was ill-conceived: those supposedly-internal markers get exposed via
2953 the undo-list, so freeing them here is unsafe. */
2954
2955 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2956 doc: /* Set internal data on last search match from elements of LIST.
2957 LIST should have been created by calling `match-data' previously.
2958
2959 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2960 (list, reseat)
2961 register Lisp_Object list, reseat;
2962 {
2963 register int i;
2964 register Lisp_Object marker;
2965
2966 if (running_asynch_code)
2967 save_search_regs ();
2968
2969 CHECK_LIST (list);
2970
2971 /* Unless we find a marker with a buffer or an explicit buffer
2972 in LIST, assume that this match data came from a string. */
2973 last_thing_searched = Qt;
2974
2975 /* Allocate registers if they don't already exist. */
2976 {
2977 int length = XFASTINT (Flength (list)) / 2;
2978
2979 if (length > search_regs.num_regs)
2980 {
2981 if (search_regs.num_regs == 0)
2982 {
2983 search_regs.start
2984 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2985 search_regs.end
2986 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2987 }
2988 else
2989 {
2990 search_regs.start
2991 = (regoff_t *) xrealloc (search_regs.start,
2992 length * sizeof (regoff_t));
2993 search_regs.end
2994 = (regoff_t *) xrealloc (search_regs.end,
2995 length * sizeof (regoff_t));
2996 }
2997
2998 for (i = search_regs.num_regs; i < length; i++)
2999 search_regs.start[i] = -1;
3000
3001 search_regs.num_regs = length;
3002 }
3003
3004 for (i = 0; CONSP (list); i++)
3005 {
3006 marker = XCAR (list);
3007 if (BUFFERP (marker))
3008 {
3009 last_thing_searched = marker;
3010 break;
3011 }
3012 if (i >= length)
3013 break;
3014 if (NILP (marker))
3015 {
3016 search_regs.start[i] = -1;
3017 list = XCDR (list);
3018 }
3019 else
3020 {
3021 int from;
3022 Lisp_Object m;
3023
3024 m = marker;
3025 if (MARKERP (marker))
3026 {
3027 if (XMARKER (marker)->buffer == 0)
3028 XSETFASTINT (marker, 0);
3029 else
3030 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3031 }
3032
3033 CHECK_NUMBER_COERCE_MARKER (marker);
3034 from = XINT (marker);
3035
3036 if (!NILP (reseat) && MARKERP (m))
3037 {
3038 unchain_marker (XMARKER (m));
3039 XSETCAR (list, Qnil);
3040 }
3041
3042 if ((list = XCDR (list), !CONSP (list)))
3043 break;
3044
3045 m = marker = XCAR (list);
3046
3047 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3048 XSETFASTINT (marker, 0);
3049
3050 CHECK_NUMBER_COERCE_MARKER (marker);
3051 search_regs.start[i] = from;
3052 search_regs.end[i] = XINT (marker);
3053
3054 if (!NILP (reseat) && MARKERP (m))
3055 {
3056 unchain_marker (XMARKER (m));
3057 XSETCAR (list, Qnil);
3058 }
3059 }
3060 list = XCDR (list);
3061 }
3062
3063 for (; i < search_regs.num_regs; i++)
3064 search_regs.start[i] = -1;
3065 }
3066
3067 return Qnil;
3068 }
3069
3070 /* If non-zero the match data have been saved in saved_search_regs
3071 during the execution of a sentinel or filter. */
3072 static int search_regs_saved;
3073 static struct re_registers saved_search_regs;
3074 static Lisp_Object saved_last_thing_searched;
3075
3076 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3077 if asynchronous code (filter or sentinel) is running. */
3078 static void
3079 save_search_regs ()
3080 {
3081 if (!search_regs_saved)
3082 {
3083 saved_search_regs.num_regs = search_regs.num_regs;
3084 saved_search_regs.start = search_regs.start;
3085 saved_search_regs.end = search_regs.end;
3086 saved_last_thing_searched = last_thing_searched;
3087 last_thing_searched = Qnil;
3088 search_regs.num_regs = 0;
3089 search_regs.start = 0;
3090 search_regs.end = 0;
3091
3092 search_regs_saved = 1;
3093 }
3094 }
3095
3096 /* Called upon exit from filters and sentinels. */
3097 void
3098 restore_search_regs ()
3099 {
3100 if (search_regs_saved)
3101 {
3102 if (search_regs.num_regs > 0)
3103 {
3104 xfree (search_regs.start);
3105 xfree (search_regs.end);
3106 }
3107 search_regs.num_regs = saved_search_regs.num_regs;
3108 search_regs.start = saved_search_regs.start;
3109 search_regs.end = saved_search_regs.end;
3110 last_thing_searched = saved_last_thing_searched;
3111 saved_last_thing_searched = Qnil;
3112 search_regs_saved = 0;
3113 }
3114 }
3115
3116 static Lisp_Object
3117 unwind_set_match_data (list)
3118 Lisp_Object list;
3119 {
3120 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3121 return Fset_match_data (list, Qt);
3122 }
3123
3124 /* Called to unwind protect the match data. */
3125 void
3126 record_unwind_save_match_data ()
3127 {
3128 record_unwind_protect (unwind_set_match_data,
3129 Fmatch_data (Qnil, Qnil, Qnil));
3130 }
3131
3132 /* Quote a string to inactivate reg-expr chars */
3133
3134 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3135 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3136 (string)
3137 Lisp_Object string;
3138 {
3139 register unsigned char *in, *out, *end;
3140 register unsigned char *temp;
3141 int backslashes_added = 0;
3142
3143 CHECK_STRING (string);
3144
3145 temp = (unsigned char *) alloca (SBYTES (string) * 2);
3146
3147 /* Now copy the data into the new string, inserting escapes. */
3148
3149 in = SDATA (string);
3150 end = in + SBYTES (string);
3151 out = temp;
3152
3153 for (; in != end; in++)
3154 {
3155 if (*in == '['
3156 || *in == '*' || *in == '.' || *in == '\\'
3157 || *in == '?' || *in == '+'
3158 || *in == '^' || *in == '$')
3159 *out++ = '\\', backslashes_added++;
3160 *out++ = *in;
3161 }
3162
3163 return make_specified_string (temp,
3164 SCHARS (string) + backslashes_added,
3165 out - temp,
3166 STRING_MULTIBYTE (string));
3167 }
3168 \f
3169 void
3170 syms_of_search ()
3171 {
3172 register int i;
3173
3174 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3175 {
3176 searchbufs[i].buf.allocated = 100;
3177 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3178 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3179 searchbufs[i].regexp = Qnil;
3180 searchbufs[i].whitespace_regexp = Qnil;
3181 searchbufs[i].syntax_table = Qnil;
3182 staticpro (&searchbufs[i].regexp);
3183 staticpro (&searchbufs[i].whitespace_regexp);
3184 staticpro (&searchbufs[i].syntax_table);
3185 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3186 }
3187 searchbuf_head = &searchbufs[0];
3188
3189 Qsearch_failed = intern ("search-failed");
3190 staticpro (&Qsearch_failed);
3191 Qinvalid_regexp = intern ("invalid-regexp");
3192 staticpro (&Qinvalid_regexp);
3193
3194 Fput (Qsearch_failed, Qerror_conditions,
3195 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3196 Fput (Qsearch_failed, Qerror_message,
3197 build_string ("Search failed"));
3198
3199 Fput (Qinvalid_regexp, Qerror_conditions,
3200 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3201 Fput (Qinvalid_regexp, Qerror_message,
3202 build_string ("Invalid regexp"));
3203
3204 last_thing_searched = Qnil;
3205 staticpro (&last_thing_searched);
3206
3207 saved_last_thing_searched = Qnil;
3208 staticpro (&saved_last_thing_searched);
3209
3210 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3211 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3212 Some commands use this for user-specified regexps.
3213 Spaces that occur inside character classes or repetition operators
3214 or other such regexp constructs are not replaced with this.
3215 A value of nil (which is the normal value) means treat spaces literally. */);
3216 Vsearch_spaces_regexp = Qnil;
3217
3218 DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3219 doc: /* Internal use only.
3220 If non-nil, the primitive searching and matching functions
3221 such as `looking-at', `string-match', `re-search-forward', etc.,
3222 do not set the match data. The proper way to use this variable
3223 is to bind it with `let' around a small expression. */);
3224 Vinhibit_changing_match_data = Qnil;
3225
3226 defsubr (&Slooking_at);
3227 defsubr (&Sposix_looking_at);
3228 defsubr (&Sstring_match);
3229 defsubr (&Sposix_string_match);
3230 defsubr (&Ssearch_forward);
3231 defsubr (&Ssearch_backward);
3232 defsubr (&Sword_search_forward);
3233 defsubr (&Sword_search_backward);
3234 defsubr (&Sre_search_forward);
3235 defsubr (&Sre_search_backward);
3236 defsubr (&Sposix_search_forward);
3237 defsubr (&Sposix_search_backward);
3238 defsubr (&Sreplace_match);
3239 defsubr (&Smatch_beginning);
3240 defsubr (&Smatch_end);
3241 defsubr (&Smatch_data);
3242 defsubr (&Sset_match_data);
3243 defsubr (&Sregexp_quote);
3244 }
3245
3246 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3247 (do not change this comment) */