]> code.delx.au - gnu-emacs/blob - src/search.c
Merge from emacs--devo--0
[gnu-emacs] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22
23 #include <config.h>
24 #include "lisp.h"
25 #include "syntax.h"
26 #include "category.h"
27 #include "buffer.h"
28 #include "character.h"
29 #include "charset.h"
30 #include "region-cache.h"
31 #include "commands.h"
32 #include "blockinput.h"
33 #include "intervals.h"
34
35 #include <sys/types.h>
36 #include "regex.h"
37
38 #define REGEXP_CACHE_SIZE 20
39
40 /* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
42 struct regexp_cache
43 {
44 struct regexp_cache *next;
45 Lisp_Object regexp, whitespace_regexp;
46 /* Syntax table for which the regexp applies. We need this because
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
49 Lisp_Object syntax_table;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
54 };
55
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
58
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache *searchbuf_head;
61
62
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
81
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
86
87 /* error condition signaled when regexp compile_pattern fails */
88
89 Lisp_Object Qinvalid_regexp;
90
91 /* Error condition used for failing searches */
92 Lisp_Object Qsearch_failed;
93
94 Lisp_Object Vsearch_spaces_regexp;
95
96 static void set_search_regs ();
97 static void save_search_regs ();
98 static int simple_search ();
99 static int boyer_moore ();
100 static int search_buffer ();
101 static void matcher_overflow () NO_RETURN;
102
103 static void
104 matcher_overflow ()
105 {
106 error ("Stack overflow in regexp matcher");
107 }
108
109 /* Compile a regexp and signal a Lisp error if anything goes wrong.
110 PATTERN is the pattern to compile.
111 CP is the place to put the result.
112 TRANSLATE is a translation table for ignoring case, or nil for none.
113 REGP is the structure that says where to store the "register"
114 values that will result from matching this pattern.
115 If it is 0, we should compile the pattern not to record any
116 subexpression bounds.
117 POSIX is nonzero if we want full backtracking (POSIX style)
118 for this pattern. 0 means backtrack only enough to get a valid match.
119
120 The behavior also depends on Vsearch_spaces_regexp. */
121
122 static void
123 compile_pattern_1 (cp, pattern, translate, regp, posix)
124 struct regexp_cache *cp;
125 Lisp_Object pattern;
126 Lisp_Object translate;
127 struct re_registers *regp;
128 int posix;
129 {
130 char *val;
131 reg_syntax_t old;
132
133 cp->regexp = Qnil;
134 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
135 cp->posix = posix;
136 cp->buf.multibyte = STRING_MULTIBYTE (pattern);
137 cp->buf.charset_unibyte = charset_unibyte;
138 cp->whitespace_regexp = Vsearch_spaces_regexp;
139 /* rms: I think BLOCK_INPUT is not needed here any more,
140 because regex.c defines malloc to call xmalloc.
141 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
142 So let's turn it off. */
143 /* BLOCK_INPUT; */
144 old = re_set_syntax (RE_SYNTAX_EMACS
145 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
146 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
147 : SDATA (Vsearch_spaces_regexp));
148
149 val = (char *) re_compile_pattern ((char *) SDATA (pattern),
150 SBYTES (pattern), &cp->buf);
151
152 /* If the compiled pattern hard codes some of the contents of the
153 syntax-table, it can only be reused with *this* syntax table. */
154 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
155
156 re_set_whitespace_regexp (NULL);
157
158 re_set_syntax (old);
159 /* UNBLOCK_INPUT; */
160 if (val)
161 xsignal1 (Qinvalid_regexp, build_string (val));
162
163 cp->regexp = Fcopy_sequence (pattern);
164 }
165
166 /* Shrink each compiled regexp buffer in the cache
167 to the size actually used right now.
168 This is called from garbage collection. */
169
170 void
171 shrink_regexp_cache ()
172 {
173 struct regexp_cache *cp;
174
175 for (cp = searchbuf_head; cp != 0; cp = cp->next)
176 {
177 cp->buf.allocated = cp->buf.used;
178 cp->buf.buffer
179 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
180 }
181 }
182
183 /* Clear the regexp cache w.r.t. a particular syntax table,
184 because it was changed.
185 There is no danger of memory leak here because re_compile_pattern
186 automagically manages the memory in each re_pattern_buffer struct,
187 based on its `allocated' and `buffer' values. */
188 void
189 clear_regexp_cache ()
190 {
191 int i;
192
193 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
194 /* It's tempting to compare with the syntax-table we've actually changd,
195 but it's not sufficient because char-table inheritance mewans that
196 modifying one syntax-table can change others at the same time. */
197 if (!EQ (searchbufs[i].syntax_table, Qt))
198 searchbufs[i].regexp = Qnil;
199 }
200
201 /* Compile a regexp if necessary, but first check to see if there's one in
202 the cache.
203 PATTERN is the pattern to compile.
204 TRANSLATE is a translation table for ignoring case, or nil for none.
205 REGP is the structure that says where to store the "register"
206 values that will result from matching this pattern.
207 If it is 0, we should compile the pattern not to record any
208 subexpression bounds.
209 POSIX is nonzero if we want full backtracking (POSIX style)
210 for this pattern. 0 means backtrack only enough to get a valid match. */
211
212 struct re_pattern_buffer *
213 compile_pattern (pattern, regp, translate, posix, multibyte)
214 Lisp_Object pattern;
215 struct re_registers *regp;
216 Lisp_Object translate;
217 int posix, multibyte;
218 {
219 struct regexp_cache *cp, **cpp;
220
221 for (cpp = &searchbuf_head; ; cpp = &cp->next)
222 {
223 cp = *cpp;
224 /* Entries are initialized to nil, and may be set to nil by
225 compile_pattern_1 if the pattern isn't valid. Don't apply
226 string accessors in those cases. However, compile_pattern_1
227 is only applied to the cache entry we pick here to reuse. So
228 nil should never appear before a non-nil entry. */
229 if (NILP (cp->regexp))
230 goto compile_it;
231 if (SCHARS (cp->regexp) == SCHARS (pattern)
232 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
233 && !NILP (Fstring_equal (cp->regexp, pattern))
234 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
235 && cp->posix == posix
236 && (EQ (cp->syntax_table, Qt)
237 || EQ (cp->syntax_table, current_buffer->syntax_table))
238 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp))
239 && cp->buf.charset_unibyte == charset_unibyte)
240 break;
241
242 /* If we're at the end of the cache, compile into the nil cell
243 we found, or the last (least recently used) cell with a
244 string value. */
245 if (cp->next == 0)
246 {
247 compile_it:
248 compile_pattern_1 (cp, pattern, translate, regp, posix);
249 break;
250 }
251 }
252
253 /* When we get here, cp (aka *cpp) contains the compiled pattern,
254 either because we found it in the cache or because we just compiled it.
255 Move it to the front of the queue to mark it as most recently used. */
256 *cpp = cp->next;
257 cp->next = searchbuf_head;
258 searchbuf_head = cp;
259
260 /* Advise the searching functions about the space we have allocated
261 for register data. */
262 if (regp)
263 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
264
265 /* The compiled pattern can be used both for mulitbyte and unibyte
266 target. But, we have to tell which the pattern is used for. */
267 cp->buf.target_multibyte = multibyte;
268
269 return &cp->buf;
270 }
271
272 \f
273 static Lisp_Object
274 looking_at_1 (string, posix)
275 Lisp_Object string;
276 int posix;
277 {
278 Lisp_Object val;
279 unsigned char *p1, *p2;
280 int s1, s2;
281 register int i;
282 struct re_pattern_buffer *bufp;
283
284 if (running_asynch_code)
285 save_search_regs ();
286
287 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
288 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
289 = current_buffer->case_eqv_table;
290
291 CHECK_STRING (string);
292 bufp = compile_pattern (string, &search_regs,
293 (!NILP (current_buffer->case_fold_search)
294 ? current_buffer->case_canon_table : Qnil),
295 posix,
296 !NILP (current_buffer->enable_multibyte_characters));
297
298 immediate_quit = 1;
299 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
300
301 /* Get pointers and sizes of the two strings
302 that make up the visible portion of the buffer. */
303
304 p1 = BEGV_ADDR;
305 s1 = GPT_BYTE - BEGV_BYTE;
306 p2 = GAP_END_ADDR;
307 s2 = ZV_BYTE - GPT_BYTE;
308 if (s1 < 0)
309 {
310 p2 = p1;
311 s2 = ZV_BYTE - BEGV_BYTE;
312 s1 = 0;
313 }
314 if (s2 < 0)
315 {
316 s1 = ZV_BYTE - BEGV_BYTE;
317 s2 = 0;
318 }
319
320 re_match_object = Qnil;
321
322 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
323 PT_BYTE - BEGV_BYTE, &search_regs,
324 ZV_BYTE - BEGV_BYTE);
325 immediate_quit = 0;
326
327 if (i == -2)
328 matcher_overflow ();
329
330 val = (0 <= i ? Qt : Qnil);
331 if (i >= 0)
332 for (i = 0; i < search_regs.num_regs; i++)
333 if (search_regs.start[i] >= 0)
334 {
335 search_regs.start[i]
336 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
337 search_regs.end[i]
338 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
339 }
340 XSETBUFFER (last_thing_searched, current_buffer);
341 return val;
342 }
343
344 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
345 doc: /* Return t if text after point matches regular expression REGEXP.
346 This function modifies the match data that `match-beginning',
347 `match-end' and `match-data' access; save and restore the match
348 data if you want to preserve them. */)
349 (regexp)
350 Lisp_Object regexp;
351 {
352 return looking_at_1 (regexp, 0);
353 }
354
355 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
356 doc: /* Return t if text after point matches regular expression REGEXP.
357 Find the longest match, in accord with Posix regular expression rules.
358 This function modifies the match data that `match-beginning',
359 `match-end' and `match-data' access; save and restore the match
360 data if you want to preserve them. */)
361 (regexp)
362 Lisp_Object regexp;
363 {
364 return looking_at_1 (regexp, 1);
365 }
366 \f
367 static Lisp_Object
368 string_match_1 (regexp, string, start, posix)
369 Lisp_Object regexp, string, start;
370 int posix;
371 {
372 int val;
373 struct re_pattern_buffer *bufp;
374 int pos, pos_byte;
375 int i;
376
377 if (running_asynch_code)
378 save_search_regs ();
379
380 CHECK_STRING (regexp);
381 CHECK_STRING (string);
382
383 if (NILP (start))
384 pos = 0, pos_byte = 0;
385 else
386 {
387 int len = SCHARS (string);
388
389 CHECK_NUMBER (start);
390 pos = XINT (start);
391 if (pos < 0 && -pos <= len)
392 pos = len + pos;
393 else if (0 > pos || pos > len)
394 args_out_of_range (string, start);
395 pos_byte = string_char_to_byte (string, pos);
396 }
397
398 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
399 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
400 = current_buffer->case_eqv_table;
401
402 bufp = compile_pattern (regexp, &search_regs,
403 (!NILP (current_buffer->case_fold_search)
404 ? current_buffer->case_canon_table : Qnil),
405 posix,
406 STRING_MULTIBYTE (string));
407 immediate_quit = 1;
408 re_match_object = string;
409
410 val = re_search (bufp, (char *) SDATA (string),
411 SBYTES (string), pos_byte,
412 SBYTES (string) - pos_byte,
413 &search_regs);
414 immediate_quit = 0;
415 last_thing_searched = Qt;
416 if (val == -2)
417 matcher_overflow ();
418 if (val < 0) return Qnil;
419
420 for (i = 0; i < search_regs.num_regs; i++)
421 if (search_regs.start[i] >= 0)
422 {
423 search_regs.start[i]
424 = string_byte_to_char (string, search_regs.start[i]);
425 search_regs.end[i]
426 = string_byte_to_char (string, search_regs.end[i]);
427 }
428
429 return make_number (string_byte_to_char (string, val));
430 }
431
432 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
433 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
434 Matching ignores case if `case-fold-search' is non-nil.
435 If third arg START is non-nil, start search at that index in STRING.
436 For index of first char beyond the match, do (match-end 0).
437 `match-end' and `match-beginning' also give indices of substrings
438 matched by parenthesis constructs in the pattern.
439
440 You can use the function `match-string' to extract the substrings
441 matched by the parenthesis constructions in REGEXP. */)
442 (regexp, string, start)
443 Lisp_Object regexp, string, start;
444 {
445 return string_match_1 (regexp, string, start, 0);
446 }
447
448 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
449 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
450 Find the longest match, in accord with Posix regular expression rules.
451 Case is ignored if `case-fold-search' is non-nil in the current buffer.
452 If third arg START is non-nil, start search at that index in STRING.
453 For index of first char beyond the match, do (match-end 0).
454 `match-end' and `match-beginning' also give indices of substrings
455 matched by parenthesis constructs in the pattern. */)
456 (regexp, string, start)
457 Lisp_Object regexp, string, start;
458 {
459 return string_match_1 (regexp, string, start, 1);
460 }
461
462 /* Match REGEXP against STRING, searching all of STRING,
463 and return the index of the match, or negative on failure.
464 This does not clobber the match data. */
465
466 int
467 fast_string_match (regexp, string)
468 Lisp_Object regexp, string;
469 {
470 int val;
471 struct re_pattern_buffer *bufp;
472
473 bufp = compile_pattern (regexp, 0, Qnil,
474 0, STRING_MULTIBYTE (string));
475 immediate_quit = 1;
476 re_match_object = string;
477
478 val = re_search (bufp, (char *) SDATA (string),
479 SBYTES (string), 0,
480 SBYTES (string), 0);
481 immediate_quit = 0;
482 return val;
483 }
484
485 /* Match REGEXP against STRING, searching all of STRING ignoring case,
486 and return the index of the match, or negative on failure.
487 This does not clobber the match data.
488 We assume that STRING contains single-byte characters. */
489
490 extern Lisp_Object Vascii_downcase_table;
491
492 int
493 fast_c_string_match_ignore_case (regexp, string)
494 Lisp_Object regexp;
495 const char *string;
496 {
497 int val;
498 struct re_pattern_buffer *bufp;
499 int len = strlen (string);
500
501 regexp = string_make_unibyte (regexp);
502 re_match_object = Qt;
503 bufp = compile_pattern (regexp, 0,
504 Vascii_canon_table, 0,
505 0);
506 immediate_quit = 1;
507 val = re_search (bufp, string, len, 0, len, 0);
508 immediate_quit = 0;
509 return val;
510 }
511
512 /* Like fast_string_match but ignore case. */
513
514 int
515 fast_string_match_ignore_case (regexp, string)
516 Lisp_Object regexp, string;
517 {
518 int val;
519 struct re_pattern_buffer *bufp;
520
521 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
522 0, STRING_MULTIBYTE (string));
523 immediate_quit = 1;
524 re_match_object = string;
525
526 val = re_search (bufp, (char *) SDATA (string),
527 SBYTES (string), 0,
528 SBYTES (string), 0);
529 immediate_quit = 0;
530 return val;
531 }
532 \f
533 /* The newline cache: remembering which sections of text have no newlines. */
534
535 /* If the user has requested newline caching, make sure it's on.
536 Otherwise, make sure it's off.
537 This is our cheezy way of associating an action with the change of
538 state of a buffer-local variable. */
539 static void
540 newline_cache_on_off (buf)
541 struct buffer *buf;
542 {
543 if (NILP (buf->cache_long_line_scans))
544 {
545 /* It should be off. */
546 if (buf->newline_cache)
547 {
548 free_region_cache (buf->newline_cache);
549 buf->newline_cache = 0;
550 }
551 }
552 else
553 {
554 /* It should be on. */
555 if (buf->newline_cache == 0)
556 buf->newline_cache = new_region_cache ();
557 }
558 }
559
560 \f
561 /* Search for COUNT instances of the character TARGET between START and END.
562
563 If COUNT is positive, search forwards; END must be >= START.
564 If COUNT is negative, search backwards for the -COUNTth instance;
565 END must be <= START.
566 If COUNT is zero, do anything you please; run rogue, for all I care.
567
568 If END is zero, use BEGV or ZV instead, as appropriate for the
569 direction indicated by COUNT.
570
571 If we find COUNT instances, set *SHORTAGE to zero, and return the
572 position past the COUNTth match. Note that for reverse motion
573 this is not the same as the usual convention for Emacs motion commands.
574
575 If we don't find COUNT instances before reaching END, set *SHORTAGE
576 to the number of TARGETs left unfound, and return END.
577
578 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
579 except when inside redisplay. */
580
581 int
582 scan_buffer (target, start, end, count, shortage, allow_quit)
583 register int target;
584 int start, end;
585 int count;
586 int *shortage;
587 int allow_quit;
588 {
589 struct region_cache *newline_cache;
590 int direction;
591
592 if (count > 0)
593 {
594 direction = 1;
595 if (! end) end = ZV;
596 }
597 else
598 {
599 direction = -1;
600 if (! end) end = BEGV;
601 }
602
603 newline_cache_on_off (current_buffer);
604 newline_cache = current_buffer->newline_cache;
605
606 if (shortage != 0)
607 *shortage = 0;
608
609 immediate_quit = allow_quit;
610
611 if (count > 0)
612 while (start != end)
613 {
614 /* Our innermost scanning loop is very simple; it doesn't know
615 about gaps, buffer ends, or the newline cache. ceiling is
616 the position of the last character before the next such
617 obstacle --- the last character the dumb search loop should
618 examine. */
619 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
620 int start_byte = CHAR_TO_BYTE (start);
621 int tem;
622
623 /* If we're looking for a newline, consult the newline cache
624 to see where we can avoid some scanning. */
625 if (target == '\n' && newline_cache)
626 {
627 int next_change;
628 immediate_quit = 0;
629 while (region_cache_forward
630 (current_buffer, newline_cache, start_byte, &next_change))
631 start_byte = next_change;
632 immediate_quit = allow_quit;
633
634 /* START should never be after END. */
635 if (start_byte > ceiling_byte)
636 start_byte = ceiling_byte;
637
638 /* Now the text after start is an unknown region, and
639 next_change is the position of the next known region. */
640 ceiling_byte = min (next_change - 1, ceiling_byte);
641 }
642
643 /* The dumb loop can only scan text stored in contiguous
644 bytes. BUFFER_CEILING_OF returns the last character
645 position that is contiguous, so the ceiling is the
646 position after that. */
647 tem = BUFFER_CEILING_OF (start_byte);
648 ceiling_byte = min (tem, ceiling_byte);
649
650 {
651 /* The termination address of the dumb loop. */
652 register unsigned char *ceiling_addr
653 = BYTE_POS_ADDR (ceiling_byte) + 1;
654 register unsigned char *cursor
655 = BYTE_POS_ADDR (start_byte);
656 unsigned char *base = cursor;
657
658 while (cursor < ceiling_addr)
659 {
660 unsigned char *scan_start = cursor;
661
662 /* The dumb loop. */
663 while (*cursor != target && ++cursor < ceiling_addr)
664 ;
665
666 /* If we're looking for newlines, cache the fact that
667 the region from start to cursor is free of them. */
668 if (target == '\n' && newline_cache)
669 know_region_cache (current_buffer, newline_cache,
670 start_byte + scan_start - base,
671 start_byte + cursor - base);
672
673 /* Did we find the target character? */
674 if (cursor < ceiling_addr)
675 {
676 if (--count == 0)
677 {
678 immediate_quit = 0;
679 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
680 }
681 cursor++;
682 }
683 }
684
685 start = BYTE_TO_CHAR (start_byte + cursor - base);
686 }
687 }
688 else
689 while (start > end)
690 {
691 /* The last character to check before the next obstacle. */
692 int ceiling_byte = CHAR_TO_BYTE (end);
693 int start_byte = CHAR_TO_BYTE (start);
694 int tem;
695
696 /* Consult the newline cache, if appropriate. */
697 if (target == '\n' && newline_cache)
698 {
699 int next_change;
700 immediate_quit = 0;
701 while (region_cache_backward
702 (current_buffer, newline_cache, start_byte, &next_change))
703 start_byte = next_change;
704 immediate_quit = allow_quit;
705
706 /* Start should never be at or before end. */
707 if (start_byte <= ceiling_byte)
708 start_byte = ceiling_byte + 1;
709
710 /* Now the text before start is an unknown region, and
711 next_change is the position of the next known region. */
712 ceiling_byte = max (next_change, ceiling_byte);
713 }
714
715 /* Stop scanning before the gap. */
716 tem = BUFFER_FLOOR_OF (start_byte - 1);
717 ceiling_byte = max (tem, ceiling_byte);
718
719 {
720 /* The termination address of the dumb loop. */
721 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
722 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
723 unsigned char *base = cursor;
724
725 while (cursor >= ceiling_addr)
726 {
727 unsigned char *scan_start = cursor;
728
729 while (*cursor != target && --cursor >= ceiling_addr)
730 ;
731
732 /* If we're looking for newlines, cache the fact that
733 the region from after the cursor to start is free of them. */
734 if (target == '\n' && newline_cache)
735 know_region_cache (current_buffer, newline_cache,
736 start_byte + cursor - base,
737 start_byte + scan_start - base);
738
739 /* Did we find the target character? */
740 if (cursor >= ceiling_addr)
741 {
742 if (++count >= 0)
743 {
744 immediate_quit = 0;
745 return BYTE_TO_CHAR (start_byte + cursor - base);
746 }
747 cursor--;
748 }
749 }
750
751 start = BYTE_TO_CHAR (start_byte + cursor - base);
752 }
753 }
754
755 immediate_quit = 0;
756 if (shortage != 0)
757 *shortage = count * direction;
758 return start;
759 }
760 \f
761 /* Search for COUNT instances of a line boundary, which means either a
762 newline or (if selective display enabled) a carriage return.
763 Start at START. If COUNT is negative, search backwards.
764
765 We report the resulting position by calling TEMP_SET_PT_BOTH.
766
767 If we find COUNT instances. we position after (always after,
768 even if scanning backwards) the COUNTth match, and return 0.
769
770 If we don't find COUNT instances before reaching the end of the
771 buffer (or the beginning, if scanning backwards), we return
772 the number of line boundaries left unfound, and position at
773 the limit we bumped up against.
774
775 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
776 except in special cases. */
777
778 int
779 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
780 int start, start_byte;
781 int limit, limit_byte;
782 register int count;
783 int allow_quit;
784 {
785 int direction = ((count > 0) ? 1 : -1);
786
787 register unsigned char *cursor;
788 unsigned char *base;
789
790 register int ceiling;
791 register unsigned char *ceiling_addr;
792
793 int old_immediate_quit = immediate_quit;
794
795 /* The code that follows is like scan_buffer
796 but checks for either newline or carriage return. */
797
798 if (allow_quit)
799 immediate_quit++;
800
801 start_byte = CHAR_TO_BYTE (start);
802
803 if (count > 0)
804 {
805 while (start_byte < limit_byte)
806 {
807 ceiling = BUFFER_CEILING_OF (start_byte);
808 ceiling = min (limit_byte - 1, ceiling);
809 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
810 base = (cursor = BYTE_POS_ADDR (start_byte));
811 while (1)
812 {
813 while (*cursor != '\n' && ++cursor != ceiling_addr)
814 ;
815
816 if (cursor != ceiling_addr)
817 {
818 if (--count == 0)
819 {
820 immediate_quit = old_immediate_quit;
821 start_byte = start_byte + cursor - base + 1;
822 start = BYTE_TO_CHAR (start_byte);
823 TEMP_SET_PT_BOTH (start, start_byte);
824 return 0;
825 }
826 else
827 if (++cursor == ceiling_addr)
828 break;
829 }
830 else
831 break;
832 }
833 start_byte += cursor - base;
834 }
835 }
836 else
837 {
838 while (start_byte > limit_byte)
839 {
840 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
841 ceiling = max (limit_byte, ceiling);
842 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
843 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
844 while (1)
845 {
846 while (--cursor != ceiling_addr && *cursor != '\n')
847 ;
848
849 if (cursor != ceiling_addr)
850 {
851 if (++count == 0)
852 {
853 immediate_quit = old_immediate_quit;
854 /* Return the position AFTER the match we found. */
855 start_byte = start_byte + cursor - base + 1;
856 start = BYTE_TO_CHAR (start_byte);
857 TEMP_SET_PT_BOTH (start, start_byte);
858 return 0;
859 }
860 }
861 else
862 break;
863 }
864 /* Here we add 1 to compensate for the last decrement
865 of CURSOR, which took it past the valid range. */
866 start_byte += cursor - base + 1;
867 }
868 }
869
870 TEMP_SET_PT_BOTH (limit, limit_byte);
871 immediate_quit = old_immediate_quit;
872
873 return count * direction;
874 }
875
876 int
877 find_next_newline_no_quit (from, cnt)
878 register int from, cnt;
879 {
880 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
881 }
882
883 /* Like find_next_newline, but returns position before the newline,
884 not after, and only search up to TO. This isn't just
885 find_next_newline (...)-1, because you might hit TO. */
886
887 int
888 find_before_next_newline (from, to, cnt)
889 int from, to, cnt;
890 {
891 int shortage;
892 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
893
894 if (shortage == 0)
895 pos--;
896
897 return pos;
898 }
899 \f
900 /* Subroutines of Lisp buffer search functions. */
901
902 static Lisp_Object
903 search_command (string, bound, noerror, count, direction, RE, posix)
904 Lisp_Object string, bound, noerror, count;
905 int direction;
906 int RE;
907 int posix;
908 {
909 register int np;
910 int lim, lim_byte;
911 int n = direction;
912
913 if (!NILP (count))
914 {
915 CHECK_NUMBER (count);
916 n *= XINT (count);
917 }
918
919 CHECK_STRING (string);
920 if (NILP (bound))
921 {
922 if (n > 0)
923 lim = ZV, lim_byte = ZV_BYTE;
924 else
925 lim = BEGV, lim_byte = BEGV_BYTE;
926 }
927 else
928 {
929 CHECK_NUMBER_COERCE_MARKER (bound);
930 lim = XINT (bound);
931 if (n > 0 ? lim < PT : lim > PT)
932 error ("Invalid search bound (wrong side of point)");
933 if (lim > ZV)
934 lim = ZV, lim_byte = ZV_BYTE;
935 else if (lim < BEGV)
936 lim = BEGV, lim_byte = BEGV_BYTE;
937 else
938 lim_byte = CHAR_TO_BYTE (lim);
939 }
940
941 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
942 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
943 = current_buffer->case_eqv_table;
944
945 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
946 (!NILP (current_buffer->case_fold_search)
947 ? current_buffer->case_canon_table
948 : Qnil),
949 (!NILP (current_buffer->case_fold_search)
950 ? current_buffer->case_eqv_table
951 : Qnil),
952 posix);
953 if (np <= 0)
954 {
955 if (NILP (noerror))
956 xsignal1 (Qsearch_failed, string);
957
958 if (!EQ (noerror, Qt))
959 {
960 if (lim < BEGV || lim > ZV)
961 abort ();
962 SET_PT_BOTH (lim, lim_byte);
963 return Qnil;
964 #if 0 /* This would be clean, but maybe programs depend on
965 a value of nil here. */
966 np = lim;
967 #endif
968 }
969 else
970 return Qnil;
971 }
972
973 if (np < BEGV || np > ZV)
974 abort ();
975
976 SET_PT (np);
977
978 return make_number (np);
979 }
980 \f
981 /* Return 1 if REGEXP it matches just one constant string. */
982
983 static int
984 trivial_regexp_p (regexp)
985 Lisp_Object regexp;
986 {
987 int len = SBYTES (regexp);
988 unsigned char *s = SDATA (regexp);
989 while (--len >= 0)
990 {
991 switch (*s++)
992 {
993 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
994 return 0;
995 case '\\':
996 if (--len < 0)
997 return 0;
998 switch (*s++)
999 {
1000 case '|': case '(': case ')': case '`': case '\'': case 'b':
1001 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1002 case 'S': case '=': case '{': case '}': case '_':
1003 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1004 case '1': case '2': case '3': case '4': case '5':
1005 case '6': case '7': case '8': case '9':
1006 return 0;
1007 }
1008 }
1009 }
1010 return 1;
1011 }
1012
1013 /* Search for the n'th occurrence of STRING in the current buffer,
1014 starting at position POS and stopping at position LIM,
1015 treating STRING as a literal string if RE is false or as
1016 a regular expression if RE is true.
1017
1018 If N is positive, searching is forward and LIM must be greater than POS.
1019 If N is negative, searching is backward and LIM must be less than POS.
1020
1021 Returns -x if x occurrences remain to be found (x > 0),
1022 or else the position at the beginning of the Nth occurrence
1023 (if searching backward) or the end (if searching forward).
1024
1025 POSIX is nonzero if we want full backtracking (POSIX style)
1026 for this pattern. 0 means backtrack only enough to get a valid match. */
1027
1028 #define TRANSLATE(out, trt, d) \
1029 do \
1030 { \
1031 if (! NILP (trt)) \
1032 { \
1033 Lisp_Object temp; \
1034 temp = Faref (trt, make_number (d)); \
1035 if (INTEGERP (temp)) \
1036 out = XINT (temp); \
1037 else \
1038 out = d; \
1039 } \
1040 else \
1041 out = d; \
1042 } \
1043 while (0)
1044
1045 static int
1046 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1047 RE, trt, inverse_trt, posix)
1048 Lisp_Object string;
1049 int pos;
1050 int pos_byte;
1051 int lim;
1052 int lim_byte;
1053 int n;
1054 int RE;
1055 Lisp_Object trt;
1056 Lisp_Object inverse_trt;
1057 int posix;
1058 {
1059 int len = SCHARS (string);
1060 int len_byte = SBYTES (string);
1061 register int i;
1062
1063 if (running_asynch_code)
1064 save_search_regs ();
1065
1066 /* Searching 0 times means don't move. */
1067 /* Null string is found at starting position. */
1068 if (len == 0 || n == 0)
1069 {
1070 set_search_regs (pos_byte, 0);
1071 return pos;
1072 }
1073
1074 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1075 {
1076 unsigned char *p1, *p2;
1077 int s1, s2;
1078 struct re_pattern_buffer *bufp;
1079
1080 bufp = compile_pattern (string, &search_regs, trt, posix,
1081 !NILP (current_buffer->enable_multibyte_characters));
1082
1083 immediate_quit = 1; /* Quit immediately if user types ^G,
1084 because letting this function finish
1085 can take too long. */
1086 QUIT; /* Do a pending quit right away,
1087 to avoid paradoxical behavior */
1088 /* Get pointers and sizes of the two strings
1089 that make up the visible portion of the buffer. */
1090
1091 p1 = BEGV_ADDR;
1092 s1 = GPT_BYTE - BEGV_BYTE;
1093 p2 = GAP_END_ADDR;
1094 s2 = ZV_BYTE - GPT_BYTE;
1095 if (s1 < 0)
1096 {
1097 p2 = p1;
1098 s2 = ZV_BYTE - BEGV_BYTE;
1099 s1 = 0;
1100 }
1101 if (s2 < 0)
1102 {
1103 s1 = ZV_BYTE - BEGV_BYTE;
1104 s2 = 0;
1105 }
1106 re_match_object = Qnil;
1107
1108 while (n < 0)
1109 {
1110 int val;
1111 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1112 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1113 &search_regs,
1114 /* Don't allow match past current point */
1115 pos_byte - BEGV_BYTE);
1116 if (val == -2)
1117 {
1118 matcher_overflow ();
1119 }
1120 if (val >= 0)
1121 {
1122 pos_byte = search_regs.start[0] + BEGV_BYTE;
1123 for (i = 0; i < search_regs.num_regs; i++)
1124 if (search_regs.start[i] >= 0)
1125 {
1126 search_regs.start[i]
1127 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1128 search_regs.end[i]
1129 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1130 }
1131 XSETBUFFER (last_thing_searched, current_buffer);
1132 /* Set pos to the new position. */
1133 pos = search_regs.start[0];
1134 }
1135 else
1136 {
1137 immediate_quit = 0;
1138 return (n);
1139 }
1140 n++;
1141 }
1142 while (n > 0)
1143 {
1144 int val;
1145 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1146 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1147 &search_regs,
1148 lim_byte - BEGV_BYTE);
1149 if (val == -2)
1150 {
1151 matcher_overflow ();
1152 }
1153 if (val >= 0)
1154 {
1155 pos_byte = search_regs.end[0] + BEGV_BYTE;
1156 for (i = 0; i < search_regs.num_regs; i++)
1157 if (search_regs.start[i] >= 0)
1158 {
1159 search_regs.start[i]
1160 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1161 search_regs.end[i]
1162 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1163 }
1164 XSETBUFFER (last_thing_searched, current_buffer);
1165 pos = search_regs.end[0];
1166 }
1167 else
1168 {
1169 immediate_quit = 0;
1170 return (0 - n);
1171 }
1172 n--;
1173 }
1174 immediate_quit = 0;
1175 return (pos);
1176 }
1177 else /* non-RE case */
1178 {
1179 unsigned char *raw_pattern, *pat;
1180 int raw_pattern_size;
1181 int raw_pattern_size_byte;
1182 unsigned char *patbuf;
1183 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1184 unsigned char *base_pat;
1185 /* Set to positive if we find a non-ASCII char that need
1186 translation. Otherwise set to zero later. */
1187 int char_base = -1;
1188 int boyer_moore_ok = 1;
1189
1190 /* MULTIBYTE says whether the text to be searched is multibyte.
1191 We must convert PATTERN to match that, or we will not really
1192 find things right. */
1193
1194 if (multibyte == STRING_MULTIBYTE (string))
1195 {
1196 raw_pattern = (unsigned char *) SDATA (string);
1197 raw_pattern_size = SCHARS (string);
1198 raw_pattern_size_byte = SBYTES (string);
1199 }
1200 else if (multibyte)
1201 {
1202 raw_pattern_size = SCHARS (string);
1203 raw_pattern_size_byte
1204 = count_size_as_multibyte (SDATA (string),
1205 raw_pattern_size);
1206 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1207 copy_text (SDATA (string), raw_pattern,
1208 SCHARS (string), 0, 1);
1209 }
1210 else
1211 {
1212 /* Converting multibyte to single-byte.
1213
1214 ??? Perhaps this conversion should be done in a special way
1215 by subtracting nonascii-insert-offset from each non-ASCII char,
1216 so that only the multibyte chars which really correspond to
1217 the chosen single-byte character set can possibly match. */
1218 raw_pattern_size = SCHARS (string);
1219 raw_pattern_size_byte = SCHARS (string);
1220 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1221 copy_text (SDATA (string), raw_pattern,
1222 SBYTES (string), 1, 0);
1223 }
1224
1225 /* Copy and optionally translate the pattern. */
1226 len = raw_pattern_size;
1227 len_byte = raw_pattern_size_byte;
1228 patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
1229 pat = patbuf;
1230 base_pat = raw_pattern;
1231 if (multibyte)
1232 {
1233 /* Fill patbuf by translated characters in STRING while
1234 checking if we can use boyer-moore search. If TRT is
1235 non-nil, we can use boyer-moore search only if TRT can be
1236 represented by the byte array of 256 elements. For that,
1237 all non-ASCII case-equivalents of all case-senstive
1238 characters in STRING must belong to the same charset and
1239 row. */
1240
1241 while (--len >= 0)
1242 {
1243 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1244 int c, translated, inverse;
1245 int in_charlen, charlen;
1246
1247 /* If we got here and the RE flag is set, it's because we're
1248 dealing with a regexp known to be trivial, so the backslash
1249 just quotes the next character. */
1250 if (RE && *base_pat == '\\')
1251 {
1252 len--;
1253 raw_pattern_size--;
1254 len_byte--;
1255 base_pat++;
1256 }
1257
1258 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1259
1260 if (NILP (trt))
1261 {
1262 str = base_pat;
1263 charlen = in_charlen;
1264 }
1265 else
1266 {
1267 /* Translate the character. */
1268 TRANSLATE (translated, trt, c);
1269 charlen = CHAR_STRING (translated, str_base);
1270 str = str_base;
1271
1272 /* Check if C has any other case-equivalents. */
1273 TRANSLATE (inverse, inverse_trt, c);
1274 /* If so, check if we can use boyer-moore. */
1275 if (c != inverse && boyer_moore_ok)
1276 {
1277 /* Check if all equivalents belong to the same
1278 group of characters. Note that the check of C
1279 itself is done by the last iteration. */
1280 int this_char_base = -1;
1281
1282 while (boyer_moore_ok)
1283 {
1284 if (ASCII_BYTE_P (inverse))
1285 {
1286 if (this_char_base > 0)
1287 boyer_moore_ok = 0;
1288 else
1289 {
1290 this_char_base = 0;
1291 if (char_base < 0)
1292 char_base = this_char_base;
1293 }
1294 }
1295 else if (CHAR_BYTE8_P (inverse))
1296 /* Boyer-moore search can't handle a
1297 translation of an eight-bit
1298 character. */
1299 boyer_moore_ok = 0;
1300 else if (this_char_base < 0)
1301 {
1302 this_char_base = inverse & ~0x3F;
1303 if (char_base < 0)
1304 char_base = this_char_base;
1305 else if (char_base > 0
1306 && this_char_base != char_base)
1307 boyer_moore_ok = 0;
1308 }
1309 else if ((inverse & ~0x3F) != this_char_base)
1310 boyer_moore_ok = 0;
1311 if (c == inverse)
1312 break;
1313 TRANSLATE (inverse, inverse_trt, inverse);
1314 }
1315 }
1316 }
1317 if (char_base < 0)
1318 char_base = 0;
1319
1320 /* Store this character into the translated pattern. */
1321 bcopy (str, pat, charlen);
1322 pat += charlen;
1323 base_pat += in_charlen;
1324 len_byte -= in_charlen;
1325 }
1326 }
1327 else
1328 {
1329 /* Unibyte buffer. */
1330 char_base = 0;
1331 while (--len >= 0)
1332 {
1333 int c, translated;
1334
1335 /* If we got here and the RE flag is set, it's because we're
1336 dealing with a regexp known to be trivial, so the backslash
1337 just quotes the next character. */
1338 if (RE && *base_pat == '\\')
1339 {
1340 len--;
1341 raw_pattern_size--;
1342 base_pat++;
1343 }
1344 c = *base_pat++;
1345 TRANSLATE (translated, trt, c);
1346 *pat++ = translated;
1347 }
1348 }
1349
1350 len_byte = pat - patbuf;
1351 len = raw_pattern_size;
1352 pat = base_pat = patbuf;
1353
1354 if (boyer_moore_ok)
1355 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1356 pos, pos_byte, lim, lim_byte,
1357 char_base);
1358 else
1359 return simple_search (n, pat, len, len_byte, trt,
1360 pos, pos_byte, lim, lim_byte);
1361 }
1362 }
1363 \f
1364 /* Do a simple string search N times for the string PAT,
1365 whose length is LEN/LEN_BYTE,
1366 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1367 TRT is the translation table.
1368
1369 Return the character position where the match is found.
1370 Otherwise, if M matches remained to be found, return -M.
1371
1372 This kind of search works regardless of what is in PAT and
1373 regardless of what is in TRT. It is used in cases where
1374 boyer_moore cannot work. */
1375
1376 static int
1377 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1378 int n;
1379 unsigned char *pat;
1380 int len, len_byte;
1381 Lisp_Object trt;
1382 int pos, pos_byte;
1383 int lim, lim_byte;
1384 {
1385 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1386 int forward = n > 0;
1387 /* Number of buffer bytes matched. Note that this may be different
1388 from len_byte in a multibyte buffer. */
1389 int match_byte;
1390
1391 if (lim > pos && multibyte)
1392 while (n > 0)
1393 {
1394 while (1)
1395 {
1396 /* Try matching at position POS. */
1397 int this_pos = pos;
1398 int this_pos_byte = pos_byte;
1399 int this_len = len;
1400 int this_len_byte = len_byte;
1401 unsigned char *p = pat;
1402 if (pos + len > lim || pos_byte + len_byte > lim_byte)
1403 goto stop;
1404
1405 while (this_len > 0)
1406 {
1407 int charlen, buf_charlen;
1408 int pat_ch, buf_ch;
1409
1410 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1411 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1412 ZV_BYTE - this_pos_byte,
1413 buf_charlen);
1414 TRANSLATE (buf_ch, trt, buf_ch);
1415
1416 if (buf_ch != pat_ch)
1417 break;
1418
1419 this_len_byte -= charlen;
1420 this_len--;
1421 p += charlen;
1422
1423 this_pos_byte += buf_charlen;
1424 this_pos++;
1425 }
1426
1427 if (this_len == 0)
1428 {
1429 match_byte = this_pos_byte - pos_byte;
1430 pos += len;
1431 pos_byte += match_byte;
1432 break;
1433 }
1434
1435 INC_BOTH (pos, pos_byte);
1436 }
1437
1438 n--;
1439 }
1440 else if (lim > pos)
1441 while (n > 0)
1442 {
1443 while (1)
1444 {
1445 /* Try matching at position POS. */
1446 int this_pos = pos;
1447 int this_len = len;
1448 unsigned char *p = pat;
1449
1450 if (pos + len > lim)
1451 goto stop;
1452
1453 while (this_len > 0)
1454 {
1455 int pat_ch = *p++;
1456 int buf_ch = FETCH_BYTE (this_pos);
1457 TRANSLATE (buf_ch, trt, buf_ch);
1458
1459 if (buf_ch != pat_ch)
1460 break;
1461
1462 this_len--;
1463 this_pos++;
1464 }
1465
1466 if (this_len == 0)
1467 {
1468 match_byte = len;
1469 pos += len;
1470 break;
1471 }
1472
1473 pos++;
1474 }
1475
1476 n--;
1477 }
1478 /* Backwards search. */
1479 else if (lim < pos && multibyte)
1480 while (n < 0)
1481 {
1482 while (1)
1483 {
1484 /* Try matching at position POS. */
1485 int this_pos = pos - len;
1486 int this_pos_byte;
1487 int this_len = len;
1488 int this_len_byte = len_byte;
1489 unsigned char *p = pat;
1490
1491 if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
1492 goto stop;
1493 this_pos_byte = CHAR_TO_BYTE (this_pos);
1494 match_byte = pos_byte - this_pos_byte;
1495
1496 while (this_len > 0)
1497 {
1498 int charlen, buf_charlen;
1499 int pat_ch, buf_ch;
1500
1501 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1502 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1503 ZV_BYTE - this_pos_byte,
1504 buf_charlen);
1505 TRANSLATE (buf_ch, trt, buf_ch);
1506
1507 if (buf_ch != pat_ch)
1508 break;
1509
1510 this_len_byte -= charlen;
1511 this_len--;
1512 p += charlen;
1513 this_pos_byte += buf_charlen;
1514 this_pos++;
1515 }
1516
1517 if (this_len == 0)
1518 {
1519 pos -= len;
1520 pos_byte -= match_byte;
1521 break;
1522 }
1523
1524 DEC_BOTH (pos, pos_byte);
1525 }
1526
1527 n++;
1528 }
1529 else if (lim < pos)
1530 while (n < 0)
1531 {
1532 while (1)
1533 {
1534 /* Try matching at position POS. */
1535 int this_pos = pos - len;
1536 int this_len = len;
1537 unsigned char *p = pat;
1538
1539 if (this_pos < lim)
1540 goto stop;
1541
1542 while (this_len > 0)
1543 {
1544 int pat_ch = *p++;
1545 int buf_ch = FETCH_BYTE (this_pos);
1546 TRANSLATE (buf_ch, trt, buf_ch);
1547
1548 if (buf_ch != pat_ch)
1549 break;
1550 this_len--;
1551 this_pos++;
1552 }
1553
1554 if (this_len == 0)
1555 {
1556 match_byte = len;
1557 pos -= len;
1558 break;
1559 }
1560
1561 pos--;
1562 }
1563
1564 n++;
1565 }
1566
1567 stop:
1568 if (n == 0)
1569 {
1570 if (forward)
1571 set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
1572 else
1573 set_search_regs (multibyte ? pos_byte : pos, match_byte);
1574
1575 return pos;
1576 }
1577 else if (n > 0)
1578 return -n;
1579 else
1580 return n;
1581 }
1582 \f
1583 /* Do Boyer-Moore search N times for the string BASE_PAT,
1584 whose length is LEN/LEN_BYTE,
1585 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1586 DIRECTION says which direction we search in.
1587 TRT and INVERSE_TRT are translation tables.
1588 Characters in PAT are already translated by TRT.
1589
1590 This kind of search works if all the characters in BASE_PAT that
1591 have nontrivial translation are the same aside from the last byte.
1592 This makes it possible to translate just the last byte of a
1593 character, and do so after just a simple test of the context.
1594 CHAR_BASE is nonzero iff there is such a non-ASCII character.
1595
1596 If that criterion is not satisfied, do not call this function. */
1597
1598 static int
1599 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1600 pos, pos_byte, lim, lim_byte, char_base)
1601 int n;
1602 unsigned char *base_pat;
1603 int len, len_byte;
1604 Lisp_Object trt;
1605 Lisp_Object inverse_trt;
1606 int pos, pos_byte;
1607 int lim, lim_byte;
1608 int char_base;
1609 {
1610 int direction = ((n > 0) ? 1 : -1);
1611 register int dirlen;
1612 int infinity, limit, stride_for_teases = 0;
1613 register int *BM_tab;
1614 int *BM_tab_base;
1615 register unsigned char *cursor, *p_limit;
1616 register int i, j;
1617 unsigned char *pat, *pat_end;
1618 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1619
1620 unsigned char simple_translate[0400];
1621 /* These are set to the preceding bytes of a byte to be translated
1622 if char_base is nonzero. As the maximum byte length of a
1623 multibyte character is 5, we have to check at most four previous
1624 bytes. */
1625 int translate_prev_byte1 = 0;
1626 int translate_prev_byte2 = 0;
1627 int translate_prev_byte3 = 0;
1628 int translate_prev_byte4 = 0;
1629
1630 #ifdef C_ALLOCA
1631 int BM_tab_space[0400];
1632 BM_tab = &BM_tab_space[0];
1633 #else
1634 BM_tab = (int *) alloca (0400 * sizeof (int));
1635 #endif
1636 /* The general approach is that we are going to maintain that we know */
1637 /* the first (closest to the present position, in whatever direction */
1638 /* we're searching) character that could possibly be the last */
1639 /* (furthest from present position) character of a valid match. We */
1640 /* advance the state of our knowledge by looking at that character */
1641 /* and seeing whether it indeed matches the last character of the */
1642 /* pattern. If it does, we take a closer look. If it does not, we */
1643 /* move our pointer (to putative last characters) as far as is */
1644 /* logically possible. This amount of movement, which I call a */
1645 /* stride, will be the length of the pattern if the actual character */
1646 /* appears nowhere in the pattern, otherwise it will be the distance */
1647 /* from the last occurrence of that character to the end of the */
1648 /* pattern. */
1649 /* As a coding trick, an enormous stride is coded into the table for */
1650 /* characters that match the last character. This allows use of only */
1651 /* a single test, a test for having gone past the end of the */
1652 /* permissible match region, to test for both possible matches (when */
1653 /* the stride goes past the end immediately) and failure to */
1654 /* match (where you get nudged past the end one stride at a time). */
1655
1656 /* Here we make a "mickey mouse" BM table. The stride of the search */
1657 /* is determined only by the last character of the putative match. */
1658 /* If that character does not match, we will stride the proper */
1659 /* distance to propose a match that superimposes it on the last */
1660 /* instance of a character that matches it (per trt), or misses */
1661 /* it entirely if there is none. */
1662
1663 dirlen = len_byte * direction;
1664 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1665
1666 /* Record position after the end of the pattern. */
1667 pat_end = base_pat + len_byte;
1668 /* BASE_PAT points to a character that we start scanning from.
1669 It is the first character in a forward search,
1670 the last character in a backward search. */
1671 if (direction < 0)
1672 base_pat = pat_end - 1;
1673
1674 BM_tab_base = BM_tab;
1675 BM_tab += 0400;
1676 j = dirlen; /* to get it in a register */
1677 /* A character that does not appear in the pattern induces a */
1678 /* stride equal to the pattern length. */
1679 while (BM_tab_base != BM_tab)
1680 {
1681 *--BM_tab = j;
1682 *--BM_tab = j;
1683 *--BM_tab = j;
1684 *--BM_tab = j;
1685 }
1686
1687 /* We use this for translation, instead of TRT itself.
1688 We fill this in to handle the characters that actually
1689 occur in the pattern. Others don't matter anyway! */
1690 bzero (simple_translate, sizeof simple_translate);
1691 for (i = 0; i < 0400; i++)
1692 simple_translate[i] = i;
1693
1694 if (char_base)
1695 {
1696 /* Setup translate_prev_byte1/2/3/4 from CHAR_BASE. Only a
1697 byte following them are the target of translation. */
1698 unsigned char str[MAX_MULTIBYTE_LENGTH];
1699 int len = CHAR_STRING (char_base, str);
1700
1701 translate_prev_byte1 = str[len - 2];
1702 if (len > 2)
1703 {
1704 translate_prev_byte2 = str[len - 3];
1705 if (len > 3)
1706 {
1707 translate_prev_byte3 = str[len - 4];
1708 if (len > 4)
1709 translate_prev_byte4 = str[len - 5];
1710 }
1711 }
1712 }
1713
1714 i = 0;
1715 while (i != infinity)
1716 {
1717 unsigned char *ptr = base_pat + i;
1718 i += direction;
1719 if (i == dirlen)
1720 i = infinity;
1721 if (! NILP (trt))
1722 {
1723 /* If the byte currently looking at is the last of a
1724 character to check case-equivalents, set CH to that
1725 character. An ASCII character and a non-ASCII character
1726 matching with CHAR_BASE are to be checked. */
1727 int ch = -1;
1728
1729 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1730 ch = *ptr;
1731 else if (char_base
1732 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1733 {
1734 unsigned char *charstart = ptr - 1;
1735
1736 while (! (CHAR_HEAD_P (*charstart)))
1737 charstart--;
1738 ch = STRING_CHAR (charstart, ptr - charstart + 1);
1739 if (char_base != (ch & ~0x3F))
1740 ch = -1;
1741 }
1742
1743 if (ch >= 0400)
1744 j = (ch & 0x3F) | 0200;
1745 else
1746 j = *ptr;
1747
1748 if (i == infinity)
1749 stride_for_teases = BM_tab[j];
1750
1751 BM_tab[j] = dirlen - i;
1752 /* A translation table is accompanied by its inverse -- see */
1753 /* comment following downcase_table for details */
1754 if (ch >= 0)
1755 {
1756 int starting_ch = ch;
1757 int starting_j = j;
1758
1759 while (1)
1760 {
1761 TRANSLATE (ch, inverse_trt, ch);
1762 if (ch >= 0400)
1763 j = (ch & 0x3F) | 0200;
1764 else
1765 j = ch;
1766
1767 /* For all the characters that map into CH,
1768 set up simple_translate to map the last byte
1769 into STARTING_J. */
1770 simple_translate[j] = starting_j;
1771 if (ch == starting_ch)
1772 break;
1773 BM_tab[j] = dirlen - i;
1774 }
1775 }
1776 }
1777 else
1778 {
1779 j = *ptr;
1780
1781 if (i == infinity)
1782 stride_for_teases = BM_tab[j];
1783 BM_tab[j] = dirlen - i;
1784 }
1785 /* stride_for_teases tells how much to stride if we get a */
1786 /* match on the far character but are subsequently */
1787 /* disappointed, by recording what the stride would have been */
1788 /* for that character if the last character had been */
1789 /* different. */
1790 }
1791 infinity = dirlen - infinity;
1792 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1793 /* loop invariant - POS_BYTE points at where last char (first
1794 char if reverse) of pattern would align in a possible match. */
1795 while (n != 0)
1796 {
1797 int tail_end;
1798 unsigned char *tail_end_ptr;
1799
1800 /* It's been reported that some (broken) compiler thinks that
1801 Boolean expressions in an arithmetic context are unsigned.
1802 Using an explicit ?1:0 prevents this. */
1803 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1804 < 0)
1805 return (n * (0 - direction));
1806 /* First we do the part we can by pointers (maybe nothing) */
1807 QUIT;
1808 pat = base_pat;
1809 limit = pos_byte - dirlen + direction;
1810 if (direction > 0)
1811 {
1812 limit = BUFFER_CEILING_OF (limit);
1813 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1814 can take on without hitting edge of buffer or the gap. */
1815 limit = min (limit, pos_byte + 20000);
1816 limit = min (limit, lim_byte - 1);
1817 }
1818 else
1819 {
1820 limit = BUFFER_FLOOR_OF (limit);
1821 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1822 can take on without hitting edge of buffer or the gap. */
1823 limit = max (limit, pos_byte - 20000);
1824 limit = max (limit, lim_byte);
1825 }
1826 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1827 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1828
1829 if ((limit - pos_byte) * direction > 20)
1830 {
1831 unsigned char *p2;
1832
1833 p_limit = BYTE_POS_ADDR (limit);
1834 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1835 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1836 while (1) /* use one cursor setting as long as i can */
1837 {
1838 if (direction > 0) /* worth duplicating */
1839 {
1840 /* Use signed comparison if appropriate
1841 to make cursor+infinity sure to be > p_limit.
1842 Assuming that the buffer lies in a range of addresses
1843 that are all "positive" (as ints) or all "negative",
1844 either kind of comparison will work as long
1845 as we don't step by infinity. So pick the kind
1846 that works when we do step by infinity. */
1847 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1848 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1849 cursor += BM_tab[*cursor];
1850 else
1851 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1852 cursor += BM_tab[*cursor];
1853 }
1854 else
1855 {
1856 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1857 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1858 cursor += BM_tab[*cursor];
1859 else
1860 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1861 cursor += BM_tab[*cursor];
1862 }
1863 /* If you are here, cursor is beyond the end of the searched region. */
1864 /* This can happen if you match on the far character of the pattern, */
1865 /* because the "stride" of that character is infinity, a number able */
1866 /* to throw you well beyond the end of the search. It can also */
1867 /* happen if you fail to match within the permitted region and would */
1868 /* otherwise try a character beyond that region */
1869 if ((cursor - p_limit) * direction <= len_byte)
1870 break; /* a small overrun is genuine */
1871 cursor -= infinity; /* large overrun = hit */
1872 i = dirlen - direction;
1873 if (! NILP (trt))
1874 {
1875 while ((i -= direction) + direction != 0)
1876 {
1877 int ch;
1878 cursor -= direction;
1879 /* Translate only the last byte of a character. */
1880 if (! multibyte
1881 || ((cursor == tail_end_ptr
1882 || CHAR_HEAD_P (cursor[1]))
1883 && (CHAR_HEAD_P (cursor[0])
1884 /* Check if this is the last byte of
1885 a translable character. */
1886 || (translate_prev_byte1 == cursor[-1]
1887 && (CHAR_HEAD_P (translate_prev_byte1)
1888 || (translate_prev_byte2 == cursor[-2]
1889 && (CHAR_HEAD_P (translate_prev_byte2)
1890 || (translate_prev_byte3 == cursor[-3]))))))))
1891 ch = simple_translate[*cursor];
1892 else
1893 ch = *cursor;
1894 if (pat[i] != ch)
1895 break;
1896 }
1897 }
1898 else
1899 {
1900 while ((i -= direction) + direction != 0)
1901 {
1902 cursor -= direction;
1903 if (pat[i] != *cursor)
1904 break;
1905 }
1906 }
1907 cursor += dirlen - i - direction; /* fix cursor */
1908 if (i + direction == 0)
1909 {
1910 int position;
1911
1912 cursor -= direction;
1913
1914 position = pos_byte + cursor - p2 + ((direction > 0)
1915 ? 1 - len_byte : 0);
1916 set_search_regs (position, len_byte);
1917
1918 if ((n -= direction) != 0)
1919 cursor += dirlen; /* to resume search */
1920 else
1921 return ((direction > 0)
1922 ? search_regs.end[0] : search_regs.start[0]);
1923 }
1924 else
1925 cursor += stride_for_teases; /* <sigh> we lose - */
1926 }
1927 pos_byte += cursor - p2;
1928 }
1929 else
1930 /* Now we'll pick up a clump that has to be done the hard */
1931 /* way because it covers a discontinuity */
1932 {
1933 limit = ((direction > 0)
1934 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1935 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1936 limit = ((direction > 0)
1937 ? min (limit + len_byte, lim_byte - 1)
1938 : max (limit - len_byte, lim_byte));
1939 /* LIMIT is now the last value POS_BYTE can have
1940 and still be valid for a possible match. */
1941 while (1)
1942 {
1943 /* This loop can be coded for space rather than */
1944 /* speed because it will usually run only once. */
1945 /* (the reach is at most len + 21, and typically */
1946 /* does not exceed len) */
1947 while ((limit - pos_byte) * direction >= 0)
1948 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1949 /* now run the same tests to distinguish going off the */
1950 /* end, a match or a phony match. */
1951 if ((pos_byte - limit) * direction <= len_byte)
1952 break; /* ran off the end */
1953 /* Found what might be a match.
1954 Set POS_BYTE back to last (first if reverse) pos. */
1955 pos_byte -= infinity;
1956 i = dirlen - direction;
1957 while ((i -= direction) + direction != 0)
1958 {
1959 int ch;
1960 unsigned char *ptr;
1961 pos_byte -= direction;
1962 ptr = BYTE_POS_ADDR (pos_byte);
1963 /* Translate only the last byte of a character. */
1964 if (! multibyte
1965 || ((ptr == tail_end_ptr
1966 || CHAR_HEAD_P (ptr[1]))
1967 && (CHAR_HEAD_P (ptr[0])
1968 /* Check if this is the last byte of a
1969 translable character. */
1970 || (translate_prev_byte1 == ptr[-1]
1971 && (CHAR_HEAD_P (translate_prev_byte1)
1972 || (translate_prev_byte2 == ptr[-2]
1973 && (CHAR_HEAD_P (translate_prev_byte2)
1974 || translate_prev_byte3 == ptr[-3])))))))
1975 ch = simple_translate[*ptr];
1976 else
1977 ch = *ptr;
1978 if (pat[i] != ch)
1979 break;
1980 }
1981 /* Above loop has moved POS_BYTE part or all the way
1982 back to the first pos (last pos if reverse).
1983 Set it once again at the last (first if reverse) char. */
1984 pos_byte += dirlen - i- direction;
1985 if (i + direction == 0)
1986 {
1987 int position;
1988 pos_byte -= direction;
1989
1990 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
1991
1992 set_search_regs (position, len_byte);
1993
1994 if ((n -= direction) != 0)
1995 pos_byte += dirlen; /* to resume search */
1996 else
1997 return ((direction > 0)
1998 ? search_regs.end[0] : search_regs.start[0]);
1999 }
2000 else
2001 pos_byte += stride_for_teases;
2002 }
2003 }
2004 /* We have done one clump. Can we continue? */
2005 if ((lim_byte - pos_byte) * direction < 0)
2006 return ((0 - n) * direction);
2007 }
2008 return BYTE_TO_CHAR (pos_byte);
2009 }
2010
2011 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2012 for the overall match just found in the current buffer.
2013 Also clear out the match data for registers 1 and up. */
2014
2015 static void
2016 set_search_regs (beg_byte, nbytes)
2017 int beg_byte, nbytes;
2018 {
2019 int i;
2020
2021 /* Make sure we have registers in which to store
2022 the match position. */
2023 if (search_regs.num_regs == 0)
2024 {
2025 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2026 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2027 search_regs.num_regs = 2;
2028 }
2029
2030 /* Clear out the other registers. */
2031 for (i = 1; i < search_regs.num_regs; i++)
2032 {
2033 search_regs.start[i] = -1;
2034 search_regs.end[i] = -1;
2035 }
2036
2037 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2038 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2039 XSETBUFFER (last_thing_searched, current_buffer);
2040 }
2041 \f
2042 /* Given a string of words separated by word delimiters,
2043 compute a regexp that matches those exact words
2044 separated by arbitrary punctuation. */
2045
2046 static Lisp_Object
2047 wordify (string)
2048 Lisp_Object string;
2049 {
2050 register unsigned char *p, *o;
2051 register int i, i_byte, len, punct_count = 0, word_count = 0;
2052 Lisp_Object val;
2053 int prev_c = 0;
2054 int adjust;
2055
2056 CHECK_STRING (string);
2057 p = SDATA (string);
2058 len = SCHARS (string);
2059
2060 for (i = 0, i_byte = 0; i < len; )
2061 {
2062 int c;
2063
2064 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2065
2066 if (SYNTAX (c) != Sword)
2067 {
2068 punct_count++;
2069 if (i > 0 && SYNTAX (prev_c) == Sword)
2070 word_count++;
2071 }
2072
2073 prev_c = c;
2074 }
2075
2076 if (SYNTAX (prev_c) == Sword)
2077 word_count++;
2078 if (!word_count)
2079 return empty_string;
2080
2081 adjust = - punct_count + 5 * (word_count - 1) + 4;
2082 if (STRING_MULTIBYTE (string))
2083 val = make_uninit_multibyte_string (len + adjust,
2084 SBYTES (string)
2085 + adjust);
2086 else
2087 val = make_uninit_string (len + adjust);
2088
2089 o = SDATA (val);
2090 *o++ = '\\';
2091 *o++ = 'b';
2092 prev_c = 0;
2093
2094 for (i = 0, i_byte = 0; i < len; )
2095 {
2096 int c;
2097 int i_byte_orig = i_byte;
2098
2099 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
2100
2101 if (SYNTAX (c) == Sword)
2102 {
2103 bcopy (SDATA (string) + i_byte_orig, o,
2104 i_byte - i_byte_orig);
2105 o += i_byte - i_byte_orig;
2106 }
2107 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2108 {
2109 *o++ = '\\';
2110 *o++ = 'W';
2111 *o++ = '\\';
2112 *o++ = 'W';
2113 *o++ = '*';
2114 }
2115
2116 prev_c = c;
2117 }
2118
2119 *o++ = '\\';
2120 *o++ = 'b';
2121
2122 return val;
2123 }
2124 \f
2125 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2126 "MSearch backward: ",
2127 doc: /* Search backward from point for STRING.
2128 Set point to the beginning of the occurrence found, and return point.
2129 An optional second argument bounds the search; it is a buffer position.
2130 The match found must not extend before that position.
2131 Optional third argument, if t, means if fail just return nil (no error).
2132 If not nil and not t, position at limit of search and return nil.
2133 Optional fourth argument is repeat count--search for successive occurrences.
2134
2135 Search case-sensitivity is determined by the value of the variable
2136 `case-fold-search', which see.
2137
2138 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2139 (string, bound, noerror, count)
2140 Lisp_Object string, bound, noerror, count;
2141 {
2142 return search_command (string, bound, noerror, count, -1, 0, 0);
2143 }
2144
2145 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2146 doc: /* Search forward from point for STRING.
2147 Set point to the end of the occurrence found, and return point.
2148 An optional second argument bounds the search; it is a buffer position.
2149 The match found must not extend after that position. A value of nil is
2150 equivalent to (point-max).
2151 Optional third argument, if t, means if fail just return nil (no error).
2152 If not nil and not t, move to limit of search and return nil.
2153 Optional fourth argument is repeat count--search for successive occurrences.
2154
2155 Search case-sensitivity is determined by the value of the variable
2156 `case-fold-search', which see.
2157
2158 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2159 (string, bound, noerror, count)
2160 Lisp_Object string, bound, noerror, count;
2161 {
2162 return search_command (string, bound, noerror, count, 1, 0, 0);
2163 }
2164
2165 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2166 "sWord search backward: ",
2167 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2168 Set point to the beginning of the occurrence found, and return point.
2169 An optional second argument bounds the search; it is a buffer position.
2170 The match found must not extend before that position.
2171 Optional third argument, if t, means if fail just return nil (no error).
2172 If not nil and not t, move to limit of search and return nil.
2173 Optional fourth argument is repeat count--search for successive occurrences. */)
2174 (string, bound, noerror, count)
2175 Lisp_Object string, bound, noerror, count;
2176 {
2177 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2178 }
2179
2180 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2181 "sWord search: ",
2182 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2183 Set point to the end of the occurrence found, and return point.
2184 An optional second argument bounds the search; it is a buffer position.
2185 The match found must not extend after that position.
2186 Optional third argument, if t, means if fail just return nil (no error).
2187 If not nil and not t, move to limit of search and return nil.
2188 Optional fourth argument is repeat count--search for successive occurrences. */)
2189 (string, bound, noerror, count)
2190 Lisp_Object string, bound, noerror, count;
2191 {
2192 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2193 }
2194
2195 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2196 "sRE search backward: ",
2197 doc: /* Search backward from point for match for regular expression REGEXP.
2198 Set point to the beginning of the match, and return point.
2199 The match found is the one starting last in the buffer
2200 and yet ending before the origin of the search.
2201 An optional second argument bounds the search; it is a buffer position.
2202 The match found must start at or after that position.
2203 Optional third argument, if t, means if fail just return nil (no error).
2204 If not nil and not t, move to limit of search and return nil.
2205 Optional fourth argument is repeat count--search for successive occurrences.
2206 See also the functions `match-beginning', `match-end', `match-string',
2207 and `replace-match'. */)
2208 (regexp, bound, noerror, count)
2209 Lisp_Object regexp, bound, noerror, count;
2210 {
2211 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2212 }
2213
2214 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2215 "sRE search: ",
2216 doc: /* Search forward from point for regular expression REGEXP.
2217 Set point to the end of the occurrence found, and return point.
2218 An optional second argument bounds the search; it is a buffer position.
2219 The match found must not extend after that position.
2220 Optional third argument, if t, means if fail just return nil (no error).
2221 If not nil and not t, move to limit of search and return nil.
2222 Optional fourth argument is repeat count--search for successive occurrences.
2223 See also the functions `match-beginning', `match-end', `match-string',
2224 and `replace-match'. */)
2225 (regexp, bound, noerror, count)
2226 Lisp_Object regexp, bound, noerror, count;
2227 {
2228 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2229 }
2230
2231 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2232 "sPosix search backward: ",
2233 doc: /* Search backward from point for match for regular expression REGEXP.
2234 Find the longest match in accord with Posix regular expression rules.
2235 Set point to the beginning of the match, and return point.
2236 The match found is the one starting last in the buffer
2237 and yet ending before the origin of the search.
2238 An optional second argument bounds the search; it is a buffer position.
2239 The match found must start at or after that position.
2240 Optional third argument, if t, means if fail just return nil (no error).
2241 If not nil and not t, move to limit of search and return nil.
2242 Optional fourth argument is repeat count--search for successive occurrences.
2243 See also the functions `match-beginning', `match-end', `match-string',
2244 and `replace-match'. */)
2245 (regexp, bound, noerror, count)
2246 Lisp_Object regexp, bound, noerror, count;
2247 {
2248 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2249 }
2250
2251 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2252 "sPosix search: ",
2253 doc: /* Search forward from point for regular expression REGEXP.
2254 Find the longest match in accord with Posix regular expression rules.
2255 Set point to the end of the occurrence found, and return point.
2256 An optional second argument bounds the search; it is a buffer position.
2257 The match found must not extend after that position.
2258 Optional third argument, if t, means if fail just return nil (no error).
2259 If not nil and not t, move to limit of search and return nil.
2260 Optional fourth argument is repeat count--search for successive occurrences.
2261 See also the functions `match-beginning', `match-end', `match-string',
2262 and `replace-match'. */)
2263 (regexp, bound, noerror, count)
2264 Lisp_Object regexp, bound, noerror, count;
2265 {
2266 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2267 }
2268 \f
2269 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2270 doc: /* Replace text matched by last search with NEWTEXT.
2271 Leave point at the end of the replacement text.
2272
2273 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2274 Otherwise maybe capitalize the whole text, or maybe just word initials,
2275 based on the replaced text.
2276 If the replaced text has only capital letters
2277 and has at least one multiletter word, convert NEWTEXT to all caps.
2278 Otherwise if all words are capitalized in the replaced text,
2279 capitalize each word in NEWTEXT.
2280
2281 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2282 Otherwise treat `\\' as special:
2283 `\\&' in NEWTEXT means substitute original matched text.
2284 `\\N' means substitute what matched the Nth `\\(...\\)'.
2285 If Nth parens didn't match, substitute nothing.
2286 `\\\\' means insert one `\\'.
2287 Case conversion does not apply to these substitutions.
2288
2289 FIXEDCASE and LITERAL are optional arguments.
2290
2291 The optional fourth argument STRING can be a string to modify.
2292 This is meaningful when the previous match was done against STRING,
2293 using `string-match'. When used this way, `replace-match'
2294 creates and returns a new string made by copying STRING and replacing
2295 the part of STRING that was matched.
2296
2297 The optional fifth argument SUBEXP specifies a subexpression;
2298 it says to replace just that subexpression with NEWTEXT,
2299 rather than replacing the entire matched text.
2300 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2301 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2302 NEWTEXT in place of subexp N.
2303 This is useful only after a regular expression search or match,
2304 since only regular expressions have distinguished subexpressions. */)
2305 (newtext, fixedcase, literal, string, subexp)
2306 Lisp_Object newtext, fixedcase, literal, string, subexp;
2307 {
2308 enum { nochange, all_caps, cap_initial } case_action;
2309 register int pos, pos_byte;
2310 int some_multiletter_word;
2311 int some_lowercase;
2312 int some_uppercase;
2313 int some_nonuppercase_initial;
2314 register int c, prevc;
2315 int sub;
2316 int opoint, newpoint;
2317
2318 CHECK_STRING (newtext);
2319
2320 if (! NILP (string))
2321 CHECK_STRING (string);
2322
2323 case_action = nochange; /* We tried an initialization */
2324 /* but some C compilers blew it */
2325
2326 if (search_regs.num_regs <= 0)
2327 error ("`replace-match' called before any match found");
2328
2329 if (NILP (subexp))
2330 sub = 0;
2331 else
2332 {
2333 CHECK_NUMBER (subexp);
2334 sub = XINT (subexp);
2335 if (sub < 0 || sub >= search_regs.num_regs)
2336 args_out_of_range (subexp, make_number (search_regs.num_regs));
2337 }
2338
2339 if (NILP (string))
2340 {
2341 if (search_regs.start[sub] < BEGV
2342 || search_regs.start[sub] > search_regs.end[sub]
2343 || search_regs.end[sub] > ZV)
2344 args_out_of_range (make_number (search_regs.start[sub]),
2345 make_number (search_regs.end[sub]));
2346 }
2347 else
2348 {
2349 if (search_regs.start[sub] < 0
2350 || search_regs.start[sub] > search_regs.end[sub]
2351 || search_regs.end[sub] > SCHARS (string))
2352 args_out_of_range (make_number (search_regs.start[sub]),
2353 make_number (search_regs.end[sub]));
2354 }
2355
2356 if (NILP (fixedcase))
2357 {
2358 /* Decide how to casify by examining the matched text. */
2359 int last;
2360
2361 pos = search_regs.start[sub];
2362 last = search_regs.end[sub];
2363
2364 if (NILP (string))
2365 pos_byte = CHAR_TO_BYTE (pos);
2366 else
2367 pos_byte = string_char_to_byte (string, pos);
2368
2369 prevc = '\n';
2370 case_action = all_caps;
2371
2372 /* some_multiletter_word is set nonzero if any original word
2373 is more than one letter long. */
2374 some_multiletter_word = 0;
2375 some_lowercase = 0;
2376 some_nonuppercase_initial = 0;
2377 some_uppercase = 0;
2378
2379 while (pos < last)
2380 {
2381 if (NILP (string))
2382 {
2383 c = FETCH_CHAR_AS_MULTIBYTE (pos_byte);
2384 INC_BOTH (pos, pos_byte);
2385 }
2386 else
2387 FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, pos, pos_byte);
2388
2389 if (LOWERCASEP (c))
2390 {
2391 /* Cannot be all caps if any original char is lower case */
2392
2393 some_lowercase = 1;
2394 if (SYNTAX (prevc) != Sword)
2395 some_nonuppercase_initial = 1;
2396 else
2397 some_multiletter_word = 1;
2398 }
2399 else if (UPPERCASEP (c))
2400 {
2401 some_uppercase = 1;
2402 if (SYNTAX (prevc) != Sword)
2403 ;
2404 else
2405 some_multiletter_word = 1;
2406 }
2407 else
2408 {
2409 /* If the initial is a caseless word constituent,
2410 treat that like a lowercase initial. */
2411 if (SYNTAX (prevc) != Sword)
2412 some_nonuppercase_initial = 1;
2413 }
2414
2415 prevc = c;
2416 }
2417
2418 /* Convert to all caps if the old text is all caps
2419 and has at least one multiletter word. */
2420 if (! some_lowercase && some_multiletter_word)
2421 case_action = all_caps;
2422 /* Capitalize each word, if the old text has all capitalized words. */
2423 else if (!some_nonuppercase_initial && some_multiletter_word)
2424 case_action = cap_initial;
2425 else if (!some_nonuppercase_initial && some_uppercase)
2426 /* Should x -> yz, operating on X, give Yz or YZ?
2427 We'll assume the latter. */
2428 case_action = all_caps;
2429 else
2430 case_action = nochange;
2431 }
2432
2433 /* Do replacement in a string. */
2434 if (!NILP (string))
2435 {
2436 Lisp_Object before, after;
2437
2438 before = Fsubstring (string, make_number (0),
2439 make_number (search_regs.start[sub]));
2440 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2441
2442 /* Substitute parts of the match into NEWTEXT
2443 if desired. */
2444 if (NILP (literal))
2445 {
2446 int lastpos = 0;
2447 int lastpos_byte = 0;
2448 /* We build up the substituted string in ACCUM. */
2449 Lisp_Object accum;
2450 Lisp_Object middle;
2451 int length = SBYTES (newtext);
2452
2453 accum = Qnil;
2454
2455 for (pos_byte = 0, pos = 0; pos_byte < length;)
2456 {
2457 int substart = -1;
2458 int subend = 0;
2459 int delbackslash = 0;
2460
2461 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2462
2463 if (c == '\\')
2464 {
2465 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2466
2467 if (c == '&')
2468 {
2469 substart = search_regs.start[sub];
2470 subend = search_regs.end[sub];
2471 }
2472 else if (c >= '1' && c <= '9')
2473 {
2474 if (search_regs.start[c - '0'] >= 0
2475 && c <= search_regs.num_regs + '0')
2476 {
2477 substart = search_regs.start[c - '0'];
2478 subend = search_regs.end[c - '0'];
2479 }
2480 else
2481 {
2482 /* If that subexp did not match,
2483 replace \\N with nothing. */
2484 substart = 0;
2485 subend = 0;
2486 }
2487 }
2488 else if (c == '\\')
2489 delbackslash = 1;
2490 else
2491 error ("Invalid use of `\\' in replacement text");
2492 }
2493 if (substart >= 0)
2494 {
2495 if (pos - 2 != lastpos)
2496 middle = substring_both (newtext, lastpos,
2497 lastpos_byte,
2498 pos - 2, pos_byte - 2);
2499 else
2500 middle = Qnil;
2501 accum = concat3 (accum, middle,
2502 Fsubstring (string,
2503 make_number (substart),
2504 make_number (subend)));
2505 lastpos = pos;
2506 lastpos_byte = pos_byte;
2507 }
2508 else if (delbackslash)
2509 {
2510 middle = substring_both (newtext, lastpos,
2511 lastpos_byte,
2512 pos - 1, pos_byte - 1);
2513
2514 accum = concat2 (accum, middle);
2515 lastpos = pos;
2516 lastpos_byte = pos_byte;
2517 }
2518 }
2519
2520 if (pos != lastpos)
2521 middle = substring_both (newtext, lastpos,
2522 lastpos_byte,
2523 pos, pos_byte);
2524 else
2525 middle = Qnil;
2526
2527 newtext = concat2 (accum, middle);
2528 }
2529
2530 /* Do case substitution in NEWTEXT if desired. */
2531 if (case_action == all_caps)
2532 newtext = Fupcase (newtext);
2533 else if (case_action == cap_initial)
2534 newtext = Fupcase_initials (newtext);
2535
2536 return concat3 (before, newtext, after);
2537 }
2538
2539 /* Record point, then move (quietly) to the start of the match. */
2540 if (PT >= search_regs.end[sub])
2541 opoint = PT - ZV;
2542 else if (PT > search_regs.start[sub])
2543 opoint = search_regs.end[sub] - ZV;
2544 else
2545 opoint = PT;
2546
2547 /* If we want non-literal replacement,
2548 perform substitution on the replacement string. */
2549 if (NILP (literal))
2550 {
2551 int length = SBYTES (newtext);
2552 unsigned char *substed;
2553 int substed_alloc_size, substed_len;
2554 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2555 int str_multibyte = STRING_MULTIBYTE (newtext);
2556 Lisp_Object rev_tbl;
2557 int really_changed = 0;
2558
2559 rev_tbl = Qnil;
2560
2561 substed_alloc_size = length * 2 + 100;
2562 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2563 substed_len = 0;
2564
2565 /* Go thru NEWTEXT, producing the actual text to insert in
2566 SUBSTED while adjusting multibyteness to that of the current
2567 buffer. */
2568
2569 for (pos_byte = 0, pos = 0; pos_byte < length;)
2570 {
2571 unsigned char str[MAX_MULTIBYTE_LENGTH];
2572 unsigned char *add_stuff = NULL;
2573 int add_len = 0;
2574 int idx = -1;
2575
2576 if (str_multibyte)
2577 {
2578 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2579 if (!buf_multibyte)
2580 c = multibyte_char_to_unibyte (c, rev_tbl);
2581 }
2582 else
2583 {
2584 /* Note that we don't have to increment POS. */
2585 c = SREF (newtext, pos_byte++);
2586 if (buf_multibyte)
2587 c = unibyte_char_to_multibyte (c);
2588 }
2589
2590 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2591 or set IDX to a match index, which means put that part
2592 of the buffer text into SUBSTED. */
2593
2594 if (c == '\\')
2595 {
2596 really_changed = 1;
2597
2598 if (str_multibyte)
2599 {
2600 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2601 pos, pos_byte);
2602 if (!buf_multibyte && !ASCII_CHAR_P (c))
2603 c = multibyte_char_to_unibyte (c, rev_tbl);
2604 }
2605 else
2606 {
2607 c = SREF (newtext, pos_byte++);
2608 if (buf_multibyte)
2609 c = unibyte_char_to_multibyte (c);
2610 }
2611
2612 if (c == '&')
2613 idx = sub;
2614 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2615 {
2616 if (search_regs.start[c - '0'] >= 1)
2617 idx = c - '0';
2618 }
2619 else if (c == '\\')
2620 add_len = 1, add_stuff = "\\";
2621 else
2622 {
2623 xfree (substed);
2624 error ("Invalid use of `\\' in replacement text");
2625 }
2626 }
2627 else
2628 {
2629 add_len = CHAR_STRING (c, str);
2630 add_stuff = str;
2631 }
2632
2633 /* If we want to copy part of a previous match,
2634 set up ADD_STUFF and ADD_LEN to point to it. */
2635 if (idx >= 0)
2636 {
2637 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2638 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2639 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2640 move_gap (search_regs.start[idx]);
2641 add_stuff = BYTE_POS_ADDR (begbyte);
2642 }
2643
2644 /* Now the stuff we want to add to SUBSTED
2645 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2646
2647 /* Make sure SUBSTED is big enough. */
2648 if (substed_len + add_len >= substed_alloc_size)
2649 {
2650 substed_alloc_size = substed_len + add_len + 500;
2651 substed = (unsigned char *) xrealloc (substed,
2652 substed_alloc_size + 1);
2653 }
2654
2655 /* Now add to the end of SUBSTED. */
2656 if (add_stuff)
2657 {
2658 bcopy (add_stuff, substed + substed_len, add_len);
2659 substed_len += add_len;
2660 }
2661 }
2662
2663 if (really_changed)
2664 {
2665 if (buf_multibyte)
2666 {
2667 int nchars = multibyte_chars_in_text (substed, substed_len);
2668
2669 newtext = make_multibyte_string (substed, nchars, substed_len);
2670 }
2671 else
2672 newtext = make_unibyte_string (substed, substed_len);
2673 }
2674 xfree (substed);
2675 }
2676
2677 /* Replace the old text with the new in the cleanest possible way. */
2678 replace_range (search_regs.start[sub], search_regs.end[sub],
2679 newtext, 1, 0, 1);
2680 newpoint = search_regs.start[sub] + SCHARS (newtext);
2681
2682 if (case_action == all_caps)
2683 Fupcase_region (make_number (search_regs.start[sub]),
2684 make_number (newpoint));
2685 else if (case_action == cap_initial)
2686 Fupcase_initials_region (make_number (search_regs.start[sub]),
2687 make_number (newpoint));
2688
2689 /* Adjust search data for this change. */
2690 {
2691 int oldend = search_regs.end[sub];
2692 int oldstart = search_regs.start[sub];
2693 int change = newpoint - search_regs.end[sub];
2694 int i;
2695
2696 for (i = 0; i < search_regs.num_regs; i++)
2697 {
2698 if (search_regs.start[i] >= oldend)
2699 search_regs.start[i] += change;
2700 else if (search_regs.start[i] > oldstart)
2701 search_regs.start[i] = oldstart;
2702 if (search_regs.end[i] >= oldend)
2703 search_regs.end[i] += change;
2704 else if (search_regs.end[i] > oldstart)
2705 search_regs.end[i] = oldstart;
2706 }
2707 }
2708
2709 /* Put point back where it was in the text. */
2710 if (opoint <= 0)
2711 TEMP_SET_PT (opoint + ZV);
2712 else
2713 TEMP_SET_PT (opoint);
2714
2715 /* Now move point "officially" to the start of the inserted replacement. */
2716 move_if_not_intangible (newpoint);
2717
2718 return Qnil;
2719 }
2720 \f
2721 static Lisp_Object
2722 match_limit (num, beginningp)
2723 Lisp_Object num;
2724 int beginningp;
2725 {
2726 register int n;
2727
2728 CHECK_NUMBER (num);
2729 n = XINT (num);
2730 if (n < 0)
2731 args_out_of_range (num, make_number (0));
2732 if (search_regs.num_regs <= 0)
2733 error ("No match data, because no search succeeded");
2734 if (n >= search_regs.num_regs
2735 || search_regs.start[n] < 0)
2736 return Qnil;
2737 return (make_number ((beginningp) ? search_regs.start[n]
2738 : search_regs.end[n]));
2739 }
2740
2741 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2742 doc: /* Return position of start of text matched by last search.
2743 SUBEXP, a number, specifies which parenthesized expression in the last
2744 regexp.
2745 Value is nil if SUBEXPth pair didn't match, or there were less than
2746 SUBEXP pairs.
2747 Zero means the entire text matched by the whole regexp or whole string. */)
2748 (subexp)
2749 Lisp_Object subexp;
2750 {
2751 return match_limit (subexp, 1);
2752 }
2753
2754 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2755 doc: /* Return position of end of text matched by last search.
2756 SUBEXP, a number, specifies which parenthesized expression in the last
2757 regexp.
2758 Value is nil if SUBEXPth pair didn't match, or there were less than
2759 SUBEXP pairs.
2760 Zero means the entire text matched by the whole regexp or whole string. */)
2761 (subexp)
2762 Lisp_Object subexp;
2763 {
2764 return match_limit (subexp, 0);
2765 }
2766
2767 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2768 doc: /* Return a list containing all info on what the last search matched.
2769 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2770 All the elements are markers or nil (nil if the Nth pair didn't match)
2771 if the last match was on a buffer; integers or nil if a string was matched.
2772 Use `store-match-data' to reinstate the data in this list.
2773
2774 If INTEGERS (the optional first argument) is non-nil, always use
2775 integers \(rather than markers) to represent buffer positions. In
2776 this case, and if the last match was in a buffer, the buffer will get
2777 stored as one additional element at the end of the list.
2778
2779 If REUSE is a list, reuse it as part of the value. If REUSE is long
2780 enough to hold all the values, and if INTEGERS is non-nil, no consing
2781 is done.
2782
2783 If optional third arg RESEAT is non-nil, any previous markers on the
2784 REUSE list will be modified to point to nowhere.
2785
2786 Return value is undefined if the last search failed. */)
2787 (integers, reuse, reseat)
2788 Lisp_Object integers, reuse, reseat;
2789 {
2790 Lisp_Object tail, prev;
2791 Lisp_Object *data;
2792 int i, len;
2793
2794 if (!NILP (reseat))
2795 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2796 if (MARKERP (XCAR (tail)))
2797 {
2798 unchain_marker (XMARKER (XCAR (tail)));
2799 XSETCAR (tail, Qnil);
2800 }
2801
2802 if (NILP (last_thing_searched))
2803 return Qnil;
2804
2805 prev = Qnil;
2806
2807 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2808 * sizeof (Lisp_Object));
2809
2810 len = 0;
2811 for (i = 0; i < search_regs.num_regs; i++)
2812 {
2813 int start = search_regs.start[i];
2814 if (start >= 0)
2815 {
2816 if (EQ (last_thing_searched, Qt)
2817 || ! NILP (integers))
2818 {
2819 XSETFASTINT (data[2 * i], start);
2820 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2821 }
2822 else if (BUFFERP (last_thing_searched))
2823 {
2824 data[2 * i] = Fmake_marker ();
2825 Fset_marker (data[2 * i],
2826 make_number (start),
2827 last_thing_searched);
2828 data[2 * i + 1] = Fmake_marker ();
2829 Fset_marker (data[2 * i + 1],
2830 make_number (search_regs.end[i]),
2831 last_thing_searched);
2832 }
2833 else
2834 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2835 abort ();
2836
2837 len = 2 * i + 2;
2838 }
2839 else
2840 data[2 * i] = data[2 * i + 1] = Qnil;
2841 }
2842
2843 if (BUFFERP (last_thing_searched) && !NILP (integers))
2844 {
2845 data[len] = last_thing_searched;
2846 len++;
2847 }
2848
2849 /* If REUSE is not usable, cons up the values and return them. */
2850 if (! CONSP (reuse))
2851 return Flist (len, data);
2852
2853 /* If REUSE is a list, store as many value elements as will fit
2854 into the elements of REUSE. */
2855 for (i = 0, tail = reuse; CONSP (tail);
2856 i++, tail = XCDR (tail))
2857 {
2858 if (i < len)
2859 XSETCAR (tail, data[i]);
2860 else
2861 XSETCAR (tail, Qnil);
2862 prev = tail;
2863 }
2864
2865 /* If we couldn't fit all value elements into REUSE,
2866 cons up the rest of them and add them to the end of REUSE. */
2867 if (i < len)
2868 XSETCDR (prev, Flist (len - i, data + i));
2869
2870 return reuse;
2871 }
2872
2873 /* Internal usage only:
2874 If RESEAT is `evaporate', put the markers back on the free list
2875 immediately. No other references to the markers must exist in this case,
2876 so it is used only internally on the unwind stack and save-match-data from
2877 Lisp. */
2878
2879 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2880 doc: /* Set internal data on last search match from elements of LIST.
2881 LIST should have been created by calling `match-data' previously.
2882
2883 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2884 (list, reseat)
2885 register Lisp_Object list, reseat;
2886 {
2887 register int i;
2888 register Lisp_Object marker;
2889
2890 if (running_asynch_code)
2891 save_search_regs ();
2892
2893 CHECK_LIST (list);
2894
2895 /* Unless we find a marker with a buffer or an explicit buffer
2896 in LIST, assume that this match data came from a string. */
2897 last_thing_searched = Qt;
2898
2899 /* Allocate registers if they don't already exist. */
2900 {
2901 int length = XFASTINT (Flength (list)) / 2;
2902
2903 if (length > search_regs.num_regs)
2904 {
2905 if (search_regs.num_regs == 0)
2906 {
2907 search_regs.start
2908 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2909 search_regs.end
2910 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
2911 }
2912 else
2913 {
2914 search_regs.start
2915 = (regoff_t *) xrealloc (search_regs.start,
2916 length * sizeof (regoff_t));
2917 search_regs.end
2918 = (regoff_t *) xrealloc (search_regs.end,
2919 length * sizeof (regoff_t));
2920 }
2921
2922 for (i = search_regs.num_regs; i < length; i++)
2923 search_regs.start[i] = -1;
2924
2925 search_regs.num_regs = length;
2926 }
2927
2928 for (i = 0; CONSP (list); i++)
2929 {
2930 marker = XCAR (list);
2931 if (BUFFERP (marker))
2932 {
2933 last_thing_searched = marker;
2934 break;
2935 }
2936 if (i >= length)
2937 break;
2938 if (NILP (marker))
2939 {
2940 search_regs.start[i] = -1;
2941 list = XCDR (list);
2942 }
2943 else
2944 {
2945 int from;
2946 Lisp_Object m;
2947
2948 m = marker;
2949 if (MARKERP (marker))
2950 {
2951 if (XMARKER (marker)->buffer == 0)
2952 XSETFASTINT (marker, 0);
2953 else
2954 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2955 }
2956
2957 CHECK_NUMBER_COERCE_MARKER (marker);
2958 from = XINT (marker);
2959
2960 if (!NILP (reseat) && MARKERP (m))
2961 {
2962 if (EQ (reseat, Qevaporate))
2963 free_marker (m);
2964 else
2965 unchain_marker (XMARKER (m));
2966 XSETCAR (list, Qnil);
2967 }
2968
2969 if ((list = XCDR (list), !CONSP (list)))
2970 break;
2971
2972 m = marker = XCAR (list);
2973
2974 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2975 XSETFASTINT (marker, 0);
2976
2977 CHECK_NUMBER_COERCE_MARKER (marker);
2978 search_regs.start[i] = from;
2979 search_regs.end[i] = XINT (marker);
2980
2981 if (!NILP (reseat) && MARKERP (m))
2982 {
2983 if (EQ (reseat, Qevaporate))
2984 free_marker (m);
2985 else
2986 unchain_marker (XMARKER (m));
2987 XSETCAR (list, Qnil);
2988 }
2989 }
2990 list = XCDR (list);
2991 }
2992
2993 for (; i < search_regs.num_regs; i++)
2994 search_regs.start[i] = -1;
2995 }
2996
2997 return Qnil;
2998 }
2999
3000 /* If non-zero the match data have been saved in saved_search_regs
3001 during the execution of a sentinel or filter. */
3002 static int search_regs_saved;
3003 static struct re_registers saved_search_regs;
3004 static Lisp_Object saved_last_thing_searched;
3005
3006 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3007 if asynchronous code (filter or sentinel) is running. */
3008 static void
3009 save_search_regs ()
3010 {
3011 if (!search_regs_saved)
3012 {
3013 saved_search_regs.num_regs = search_regs.num_regs;
3014 saved_search_regs.start = search_regs.start;
3015 saved_search_regs.end = search_regs.end;
3016 saved_last_thing_searched = last_thing_searched;
3017 last_thing_searched = Qnil;
3018 search_regs.num_regs = 0;
3019 search_regs.start = 0;
3020 search_regs.end = 0;
3021
3022 search_regs_saved = 1;
3023 }
3024 }
3025
3026 /* Called upon exit from filters and sentinels. */
3027 void
3028 restore_search_regs ()
3029 {
3030 if (search_regs_saved)
3031 {
3032 if (search_regs.num_regs > 0)
3033 {
3034 xfree (search_regs.start);
3035 xfree (search_regs.end);
3036 }
3037 search_regs.num_regs = saved_search_regs.num_regs;
3038 search_regs.start = saved_search_regs.start;
3039 search_regs.end = saved_search_regs.end;
3040 last_thing_searched = saved_last_thing_searched;
3041 saved_last_thing_searched = Qnil;
3042 search_regs_saved = 0;
3043 }
3044 }
3045
3046 static Lisp_Object
3047 unwind_set_match_data (list)
3048 Lisp_Object list;
3049 {
3050 /* It is safe to free (evaporate) the markers immediately. */
3051 return Fset_match_data (list, Qevaporate);
3052 }
3053
3054 /* Called to unwind protect the match data. */
3055 void
3056 record_unwind_save_match_data ()
3057 {
3058 record_unwind_protect (unwind_set_match_data,
3059 Fmatch_data (Qnil, Qnil, Qnil));
3060 }
3061
3062 /* Quote a string to inactivate reg-expr chars */
3063
3064 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3065 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3066 (string)
3067 Lisp_Object string;
3068 {
3069 register unsigned char *in, *out, *end;
3070 register unsigned char *temp;
3071 int backslashes_added = 0;
3072
3073 CHECK_STRING (string);
3074
3075 temp = (unsigned char *) alloca (SBYTES (string) * 2);
3076
3077 /* Now copy the data into the new string, inserting escapes. */
3078
3079 in = SDATA (string);
3080 end = in + SBYTES (string);
3081 out = temp;
3082
3083 for (; in != end; in++)
3084 {
3085 if (*in == '['
3086 || *in == '*' || *in == '.' || *in == '\\'
3087 || *in == '?' || *in == '+'
3088 || *in == '^' || *in == '$')
3089 *out++ = '\\', backslashes_added++;
3090 *out++ = *in;
3091 }
3092
3093 return make_specified_string (temp,
3094 SCHARS (string) + backslashes_added,
3095 out - temp,
3096 STRING_MULTIBYTE (string));
3097 }
3098 \f
3099 void
3100 syms_of_search ()
3101 {
3102 register int i;
3103
3104 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3105 {
3106 searchbufs[i].buf.allocated = 100;
3107 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3108 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3109 searchbufs[i].regexp = Qnil;
3110 searchbufs[i].whitespace_regexp = Qnil;
3111 searchbufs[i].syntax_table = Qnil;
3112 staticpro (&searchbufs[i].regexp);
3113 staticpro (&searchbufs[i].whitespace_regexp);
3114 staticpro (&searchbufs[i].syntax_table);
3115 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3116 }
3117 searchbuf_head = &searchbufs[0];
3118
3119 Qsearch_failed = intern ("search-failed");
3120 staticpro (&Qsearch_failed);
3121 Qinvalid_regexp = intern ("invalid-regexp");
3122 staticpro (&Qinvalid_regexp);
3123
3124 Fput (Qsearch_failed, Qerror_conditions,
3125 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3126 Fput (Qsearch_failed, Qerror_message,
3127 build_string ("Search failed"));
3128
3129 Fput (Qinvalid_regexp, Qerror_conditions,
3130 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3131 Fput (Qinvalid_regexp, Qerror_message,
3132 build_string ("Invalid regexp"));
3133
3134 last_thing_searched = Qnil;
3135 staticpro (&last_thing_searched);
3136
3137 saved_last_thing_searched = Qnil;
3138 staticpro (&saved_last_thing_searched);
3139
3140 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3141 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3142 Some commands use this for user-specified regexps.
3143 Spaces that occur inside character classes or repetition operators
3144 or other such regexp constructs are not replaced with this.
3145 A value of nil (which is the normal value) means treat spaces literally. */);
3146 Vsearch_spaces_regexp = Qnil;
3147
3148 defsubr (&Slooking_at);
3149 defsubr (&Sposix_looking_at);
3150 defsubr (&Sstring_match);
3151 defsubr (&Sposix_string_match);
3152 defsubr (&Ssearch_forward);
3153 defsubr (&Ssearch_backward);
3154 defsubr (&Sword_search_forward);
3155 defsubr (&Sword_search_backward);
3156 defsubr (&Sre_search_forward);
3157 defsubr (&Sre_search_backward);
3158 defsubr (&Sposix_search_forward);
3159 defsubr (&Sposix_search_backward);
3160 defsubr (&Sreplace_match);
3161 defsubr (&Smatch_beginning);
3162 defsubr (&Smatch_end);
3163 defsubr (&Smatch_data);
3164 defsubr (&Sset_match_data);
3165 defsubr (&Sregexp_quote);
3166 }
3167
3168 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3169 (do not change this comment) */