]> code.delx.au - gnu-emacs/blob - src/search.c
Merge from emacs--rel--22
[gnu-emacs] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
3 2003, 2004, 2005, 2006, 2007, 2008
4 Free Software Foundation, Inc.
5
6 This file is part of GNU Emacs.
7
8 GNU Emacs is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GNU Emacs is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GNU Emacs; see the file COPYING. If not, write to
20 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
21 Boston, MA 02110-1301, USA. */
22
23
24 #include <config.h>
25 #include "lisp.h"
26 #include "syntax.h"
27 #include "category.h"
28 #include "buffer.h"
29 #include "charset.h"
30 #include "region-cache.h"
31 #include "commands.h"
32 #include "blockinput.h"
33 #include "intervals.h"
34
35 #include <sys/types.h>
36 #include "regex.h"
37
38 #define REGEXP_CACHE_SIZE 20
39
40 /* If the regexp is non-nil, then the buffer contains the compiled form
41 of that regexp, suitable for searching. */
42 struct regexp_cache
43 {
44 struct regexp_cache *next;
45 Lisp_Object regexp, whitespace_regexp;
46 /* Syntax table for which the regexp applies. We need this because
47 of character classes. If this is t, then the compiled pattern is valid
48 for any syntax-table. */
49 Lisp_Object syntax_table;
50 struct re_pattern_buffer buf;
51 char fastmap[0400];
52 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
53 char posix;
54 };
55
56 /* The instances of that struct. */
57 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
58
59 /* The head of the linked list; points to the most recently used buffer. */
60 struct regexp_cache *searchbuf_head;
61
62
63 /* Every call to re_match, etc., must pass &search_regs as the regs
64 argument unless you can show it is unnecessary (i.e., if re_match
65 is certainly going to be called again before region-around-match
66 can be called).
67
68 Since the registers are now dynamically allocated, we need to make
69 sure not to refer to the Nth register before checking that it has
70 been allocated by checking search_regs.num_regs.
71
72 The regex code keeps track of whether it has allocated the search
73 buffer using bits in the re_pattern_buffer. This means that whenever
74 you compile a new pattern, it completely forgets whether it has
75 allocated any registers, and will allocate new registers the next
76 time you call a searching or matching function. Therefore, we need
77 to call re_set_registers after compiling a new pattern or after
78 setting the match registers, so that the regex functions will be
79 able to free or re-allocate it properly. */
80 static struct re_registers search_regs;
81
82 /* The buffer in which the last search was performed, or
83 Qt if the last search was done in a string;
84 Qnil if no searching has been done yet. */
85 static Lisp_Object last_thing_searched;
86
87 /* error condition signaled when regexp compile_pattern fails */
88
89 Lisp_Object Qinvalid_regexp;
90
91 /* Error condition used for failing searches */
92 Lisp_Object Qsearch_failed;
93
94 Lisp_Object Vsearch_spaces_regexp;
95
96 /* If non-nil, the match data will not be changed during call to
97 searching or matching functions. This variable is for internal use
98 only. */
99 Lisp_Object Vinhibit_changing_match_data;
100
101 static void set_search_regs ();
102 static void save_search_regs ();
103 static int simple_search ();
104 static int boyer_moore ();
105 static int search_buffer ();
106 static void matcher_overflow () NO_RETURN;
107
108 static void
109 matcher_overflow ()
110 {
111 error ("Stack overflow in regexp matcher");
112 }
113
114 /* Compile a regexp and signal a Lisp error if anything goes wrong.
115 PATTERN is the pattern to compile.
116 CP is the place to put the result.
117 TRANSLATE is a translation table for ignoring case, or nil for none.
118 REGP is the structure that says where to store the "register"
119 values that will result from matching this pattern.
120 If it is 0, we should compile the pattern not to record any
121 subexpression bounds.
122 POSIX is nonzero if we want full backtracking (POSIX style)
123 for this pattern. 0 means backtrack only enough to get a valid match.
124 MULTIBYTE is nonzero if we want to handle multibyte characters in
125 PATTERN. 0 means all multibyte characters are recognized just as
126 sequences of binary data.
127
128 The behavior also depends on Vsearch_spaces_regexp. */
129
130 static void
131 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
132 struct regexp_cache *cp;
133 Lisp_Object pattern;
134 Lisp_Object translate;
135 struct re_registers *regp;
136 int posix;
137 int multibyte;
138 {
139 unsigned char *raw_pattern;
140 int raw_pattern_size;
141 char *val;
142 reg_syntax_t old;
143
144 /* MULTIBYTE says whether the text to be searched is multibyte.
145 We must convert PATTERN to match that, or we will not really
146 find things right. */
147
148 if (multibyte == STRING_MULTIBYTE (pattern))
149 {
150 raw_pattern = (unsigned char *) SDATA (pattern);
151 raw_pattern_size = SBYTES (pattern);
152 }
153 else if (multibyte)
154 {
155 raw_pattern_size = count_size_as_multibyte (SDATA (pattern),
156 SCHARS (pattern));
157 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
158 copy_text (SDATA (pattern), raw_pattern,
159 SCHARS (pattern), 0, 1);
160 }
161 else
162 {
163 /* Converting multibyte to single-byte.
164
165 ??? Perhaps this conversion should be done in a special way
166 by subtracting nonascii-insert-offset from each non-ASCII char,
167 so that only the multibyte chars which really correspond to
168 the chosen single-byte character set can possibly match. */
169 raw_pattern_size = SCHARS (pattern);
170 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
171 copy_text (SDATA (pattern), raw_pattern,
172 SBYTES (pattern), 1, 0);
173 }
174
175 cp->regexp = Qnil;
176 cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
177 cp->posix = posix;
178 cp->buf.multibyte = multibyte;
179 cp->whitespace_regexp = Vsearch_spaces_regexp;
180 /* rms: I think BLOCK_INPUT is not needed here any more,
181 because regex.c defines malloc to call xmalloc.
182 Using BLOCK_INPUT here means the debugger won't run if an error occurs.
183 So let's turn it off. */
184 /* BLOCK_INPUT; */
185 old = re_set_syntax (RE_SYNTAX_EMACS
186 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
187
188 re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
189 : SDATA (Vsearch_spaces_regexp));
190
191 val = (char *) re_compile_pattern ((char *)raw_pattern,
192 raw_pattern_size, &cp->buf);
193
194 /* If the compiled pattern hard codes some of the contents of the
195 syntax-table, it can only be reused with *this* syntax table. */
196 cp->syntax_table = cp->buf.used_syntax ? current_buffer->syntax_table : Qt;
197
198 re_set_whitespace_regexp (NULL);
199
200 re_set_syntax (old);
201 /* UNBLOCK_INPUT; */
202 if (val)
203 xsignal1 (Qinvalid_regexp, build_string (val));
204
205 cp->regexp = Fcopy_sequence (pattern);
206 }
207
208 /* Shrink each compiled regexp buffer in the cache
209 to the size actually used right now.
210 This is called from garbage collection. */
211
212 void
213 shrink_regexp_cache ()
214 {
215 struct regexp_cache *cp;
216
217 for (cp = searchbuf_head; cp != 0; cp = cp->next)
218 {
219 cp->buf.allocated = cp->buf.used;
220 cp->buf.buffer
221 = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
222 }
223 }
224
225 /* Clear the regexp cache w.r.t. a particular syntax table,
226 because it was changed.
227 There is no danger of memory leak here because re_compile_pattern
228 automagically manages the memory in each re_pattern_buffer struct,
229 based on its `allocated' and `buffer' values. */
230 void
231 clear_regexp_cache ()
232 {
233 int i;
234
235 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
236 /* It's tempting to compare with the syntax-table we've actually changd,
237 but it's not sufficient because char-table inheritance mewans that
238 modifying one syntax-table can change others at the same time. */
239 if (!EQ (searchbufs[i].syntax_table, Qt))
240 searchbufs[i].regexp = Qnil;
241 }
242
243 /* Compile a regexp if necessary, but first check to see if there's one in
244 the cache.
245 PATTERN is the pattern to compile.
246 TRANSLATE is a translation table for ignoring case, or nil for none.
247 REGP is the structure that says where to store the "register"
248 values that will result from matching this pattern.
249 If it is 0, we should compile the pattern not to record any
250 subexpression bounds.
251 POSIX is nonzero if we want full backtracking (POSIX style)
252 for this pattern. 0 means backtrack only enough to get a valid match. */
253
254 struct re_pattern_buffer *
255 compile_pattern (pattern, regp, translate, posix, multibyte)
256 Lisp_Object pattern;
257 struct re_registers *regp;
258 Lisp_Object translate;
259 int posix, multibyte;
260 {
261 struct regexp_cache *cp, **cpp;
262
263 for (cpp = &searchbuf_head; ; cpp = &cp->next)
264 {
265 cp = *cpp;
266 /* Entries are initialized to nil, and may be set to nil by
267 compile_pattern_1 if the pattern isn't valid. Don't apply
268 string accessors in those cases. However, compile_pattern_1
269 is only applied to the cache entry we pick here to reuse. So
270 nil should never appear before a non-nil entry. */
271 if (NILP (cp->regexp))
272 goto compile_it;
273 if (SCHARS (cp->regexp) == SCHARS (pattern)
274 && STRING_MULTIBYTE (cp->regexp) == STRING_MULTIBYTE (pattern)
275 && !NILP (Fstring_equal (cp->regexp, pattern))
276 && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
277 && cp->posix == posix
278 && cp->buf.multibyte == multibyte
279 && (EQ (cp->syntax_table, Qt)
280 || EQ (cp->syntax_table, current_buffer->syntax_table))
281 && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
282 break;
283
284 /* If we're at the end of the cache, compile into the nil cell
285 we found, or the last (least recently used) cell with a
286 string value. */
287 if (cp->next == 0)
288 {
289 compile_it:
290 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
291 break;
292 }
293 }
294
295 /* When we get here, cp (aka *cpp) contains the compiled pattern,
296 either because we found it in the cache or because we just compiled it.
297 Move it to the front of the queue to mark it as most recently used. */
298 *cpp = cp->next;
299 cp->next = searchbuf_head;
300 searchbuf_head = cp;
301
302 /* Advise the searching functions about the space we have allocated
303 for register data. */
304 if (regp)
305 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
306
307 return &cp->buf;
308 }
309
310 \f
311 static Lisp_Object
312 looking_at_1 (string, posix)
313 Lisp_Object string;
314 int posix;
315 {
316 Lisp_Object val;
317 unsigned char *p1, *p2;
318 int s1, s2;
319 register int i;
320 struct re_pattern_buffer *bufp;
321
322 if (running_asynch_code)
323 save_search_regs ();
324
325 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
326 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
327 = current_buffer->case_eqv_table;
328
329 CHECK_STRING (string);
330 bufp = compile_pattern (string,
331 (NILP (Vinhibit_changing_match_data)
332 ? &search_regs : NULL),
333 (!NILP (current_buffer->case_fold_search)
334 ? current_buffer->case_canon_table : Qnil),
335 posix,
336 !NILP (current_buffer->enable_multibyte_characters));
337
338 immediate_quit = 1;
339 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
340
341 /* Get pointers and sizes of the two strings
342 that make up the visible portion of the buffer. */
343
344 p1 = BEGV_ADDR;
345 s1 = GPT_BYTE - BEGV_BYTE;
346 p2 = GAP_END_ADDR;
347 s2 = ZV_BYTE - GPT_BYTE;
348 if (s1 < 0)
349 {
350 p2 = p1;
351 s2 = ZV_BYTE - BEGV_BYTE;
352 s1 = 0;
353 }
354 if (s2 < 0)
355 {
356 s1 = ZV_BYTE - BEGV_BYTE;
357 s2 = 0;
358 }
359
360 re_match_object = Qnil;
361
362 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
363 PT_BYTE - BEGV_BYTE,
364 (NILP (Vinhibit_changing_match_data)
365 ? &search_regs : NULL),
366 ZV_BYTE - BEGV_BYTE);
367 immediate_quit = 0;
368
369 if (i == -2)
370 matcher_overflow ();
371
372 val = (0 <= i ? Qt : Qnil);
373 if (NILP (Vinhibit_changing_match_data) && i >= 0)
374 for (i = 0; i < search_regs.num_regs; i++)
375 if (search_regs.start[i] >= 0)
376 {
377 search_regs.start[i]
378 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
379 search_regs.end[i]
380 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
381 }
382
383 /* Set last_thing_searched only when match data is changed. */
384 if (NILP (Vinhibit_changing_match_data))
385 XSETBUFFER (last_thing_searched, current_buffer);
386
387 return val;
388 }
389
390 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
391 doc: /* Return t if text after point matches regular expression REGEXP.
392 This function modifies the match data that `match-beginning',
393 `match-end' and `match-data' access; save and restore the match
394 data if you want to preserve them. */)
395 (regexp)
396 Lisp_Object regexp;
397 {
398 return looking_at_1 (regexp, 0);
399 }
400
401 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
402 doc: /* Return t if text after point matches regular expression REGEXP.
403 Find the longest match, in accord with Posix regular expression rules.
404 This function modifies the match data that `match-beginning',
405 `match-end' and `match-data' access; save and restore the match
406 data if you want to preserve them. */)
407 (regexp)
408 Lisp_Object regexp;
409 {
410 return looking_at_1 (regexp, 1);
411 }
412 \f
413 static Lisp_Object
414 string_match_1 (regexp, string, start, posix)
415 Lisp_Object regexp, string, start;
416 int posix;
417 {
418 int val;
419 struct re_pattern_buffer *bufp;
420 int pos, pos_byte;
421 int i;
422
423 if (running_asynch_code)
424 save_search_regs ();
425
426 CHECK_STRING (regexp);
427 CHECK_STRING (string);
428
429 if (NILP (start))
430 pos = 0, pos_byte = 0;
431 else
432 {
433 int len = SCHARS (string);
434
435 CHECK_NUMBER (start);
436 pos = XINT (start);
437 if (pos < 0 && -pos <= len)
438 pos = len + pos;
439 else if (0 > pos || pos > len)
440 args_out_of_range (string, start);
441 pos_byte = string_char_to_byte (string, pos);
442 }
443
444 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
445 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
446 = current_buffer->case_eqv_table;
447
448 bufp = compile_pattern (regexp,
449 (NILP (Vinhibit_changing_match_data)
450 ? &search_regs : NULL),
451 (!NILP (current_buffer->case_fold_search)
452 ? current_buffer->case_canon_table : Qnil),
453 posix,
454 STRING_MULTIBYTE (string));
455 immediate_quit = 1;
456 re_match_object = string;
457
458 val = re_search (bufp, (char *) SDATA (string),
459 SBYTES (string), pos_byte,
460 SBYTES (string) - pos_byte,
461 (NILP (Vinhibit_changing_match_data)
462 ? &search_regs : NULL));
463 immediate_quit = 0;
464
465 /* Set last_thing_searched only when match data is changed. */
466 if (NILP (Vinhibit_changing_match_data))
467 last_thing_searched = Qt;
468
469 if (val == -2)
470 matcher_overflow ();
471 if (val < 0) return Qnil;
472
473 if (NILP (Vinhibit_changing_match_data))
474 for (i = 0; i < search_regs.num_regs; i++)
475 if (search_regs.start[i] >= 0)
476 {
477 search_regs.start[i]
478 = string_byte_to_char (string, search_regs.start[i]);
479 search_regs.end[i]
480 = string_byte_to_char (string, search_regs.end[i]);
481 }
482
483 return make_number (string_byte_to_char (string, val));
484 }
485
486 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
487 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
488 Matching ignores case if `case-fold-search' is non-nil.
489 If third arg START is non-nil, start search at that index in STRING.
490 For index of first char beyond the match, do (match-end 0).
491 `match-end' and `match-beginning' also give indices of substrings
492 matched by parenthesis constructs in the pattern.
493
494 You can use the function `match-string' to extract the substrings
495 matched by the parenthesis constructions in REGEXP. */)
496 (regexp, string, start)
497 Lisp_Object regexp, string, start;
498 {
499 return string_match_1 (regexp, string, start, 0);
500 }
501
502 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
503 doc: /* Return index of start of first match for REGEXP in STRING, or nil.
504 Find the longest match, in accord with Posix regular expression rules.
505 Case is ignored if `case-fold-search' is non-nil in the current buffer.
506 If third arg START is non-nil, start search at that index in STRING.
507 For index of first char beyond the match, do (match-end 0).
508 `match-end' and `match-beginning' also give indices of substrings
509 matched by parenthesis constructs in the pattern. */)
510 (regexp, string, start)
511 Lisp_Object regexp, string, start;
512 {
513 return string_match_1 (regexp, string, start, 1);
514 }
515
516 /* Match REGEXP against STRING, searching all of STRING,
517 and return the index of the match, or negative on failure.
518 This does not clobber the match data. */
519
520 int
521 fast_string_match (regexp, string)
522 Lisp_Object regexp, string;
523 {
524 int val;
525 struct re_pattern_buffer *bufp;
526
527 bufp = compile_pattern (regexp, 0, Qnil,
528 0, STRING_MULTIBYTE (string));
529 immediate_quit = 1;
530 re_match_object = string;
531
532 val = re_search (bufp, (char *) SDATA (string),
533 SBYTES (string), 0,
534 SBYTES (string), 0);
535 immediate_quit = 0;
536 return val;
537 }
538
539 /* Match REGEXP against STRING, searching all of STRING ignoring case,
540 and return the index of the match, or negative on failure.
541 This does not clobber the match data.
542 We assume that STRING contains single-byte characters. */
543
544 extern Lisp_Object Vascii_downcase_table;
545
546 int
547 fast_c_string_match_ignore_case (regexp, string)
548 Lisp_Object regexp;
549 const char *string;
550 {
551 int val;
552 struct re_pattern_buffer *bufp;
553 int len = strlen (string);
554
555 regexp = string_make_unibyte (regexp);
556 re_match_object = Qt;
557 bufp = compile_pattern (regexp, 0,
558 Vascii_canon_table, 0,
559 0);
560 immediate_quit = 1;
561 val = re_search (bufp, string, len, 0, len, 0);
562 immediate_quit = 0;
563 return val;
564 }
565
566 /* Like fast_string_match but ignore case. */
567
568 int
569 fast_string_match_ignore_case (regexp, string)
570 Lisp_Object regexp, string;
571 {
572 int val;
573 struct re_pattern_buffer *bufp;
574
575 bufp = compile_pattern (regexp, 0, Vascii_canon_table,
576 0, STRING_MULTIBYTE (string));
577 immediate_quit = 1;
578 re_match_object = string;
579
580 val = re_search (bufp, (char *) SDATA (string),
581 SBYTES (string), 0,
582 SBYTES (string), 0);
583 immediate_quit = 0;
584 return val;
585 }
586 \f
587 /* The newline cache: remembering which sections of text have no newlines. */
588
589 /* If the user has requested newline caching, make sure it's on.
590 Otherwise, make sure it's off.
591 This is our cheezy way of associating an action with the change of
592 state of a buffer-local variable. */
593 static void
594 newline_cache_on_off (buf)
595 struct buffer *buf;
596 {
597 if (NILP (buf->cache_long_line_scans))
598 {
599 /* It should be off. */
600 if (buf->newline_cache)
601 {
602 free_region_cache (buf->newline_cache);
603 buf->newline_cache = 0;
604 }
605 }
606 else
607 {
608 /* It should be on. */
609 if (buf->newline_cache == 0)
610 buf->newline_cache = new_region_cache ();
611 }
612 }
613
614 \f
615 /* Search for COUNT instances of the character TARGET between START and END.
616
617 If COUNT is positive, search forwards; END must be >= START.
618 If COUNT is negative, search backwards for the -COUNTth instance;
619 END must be <= START.
620 If COUNT is zero, do anything you please; run rogue, for all I care.
621
622 If END is zero, use BEGV or ZV instead, as appropriate for the
623 direction indicated by COUNT.
624
625 If we find COUNT instances, set *SHORTAGE to zero, and return the
626 position past the COUNTth match. Note that for reverse motion
627 this is not the same as the usual convention for Emacs motion commands.
628
629 If we don't find COUNT instances before reaching END, set *SHORTAGE
630 to the number of TARGETs left unfound, and return END.
631
632 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
633 except when inside redisplay. */
634
635 int
636 scan_buffer (target, start, end, count, shortage, allow_quit)
637 register int target;
638 int start, end;
639 int count;
640 int *shortage;
641 int allow_quit;
642 {
643 struct region_cache *newline_cache;
644 int direction;
645
646 if (count > 0)
647 {
648 direction = 1;
649 if (! end) end = ZV;
650 }
651 else
652 {
653 direction = -1;
654 if (! end) end = BEGV;
655 }
656
657 newline_cache_on_off (current_buffer);
658 newline_cache = current_buffer->newline_cache;
659
660 if (shortage != 0)
661 *shortage = 0;
662
663 immediate_quit = allow_quit;
664
665 if (count > 0)
666 while (start != end)
667 {
668 /* Our innermost scanning loop is very simple; it doesn't know
669 about gaps, buffer ends, or the newline cache. ceiling is
670 the position of the last character before the next such
671 obstacle --- the last character the dumb search loop should
672 examine. */
673 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
674 int start_byte = CHAR_TO_BYTE (start);
675 int tem;
676
677 /* If we're looking for a newline, consult the newline cache
678 to see where we can avoid some scanning. */
679 if (target == '\n' && newline_cache)
680 {
681 int next_change;
682 immediate_quit = 0;
683 while (region_cache_forward
684 (current_buffer, newline_cache, start_byte, &next_change))
685 start_byte = next_change;
686 immediate_quit = allow_quit;
687
688 /* START should never be after END. */
689 if (start_byte > ceiling_byte)
690 start_byte = ceiling_byte;
691
692 /* Now the text after start is an unknown region, and
693 next_change is the position of the next known region. */
694 ceiling_byte = min (next_change - 1, ceiling_byte);
695 }
696
697 /* The dumb loop can only scan text stored in contiguous
698 bytes. BUFFER_CEILING_OF returns the last character
699 position that is contiguous, so the ceiling is the
700 position after that. */
701 tem = BUFFER_CEILING_OF (start_byte);
702 ceiling_byte = min (tem, ceiling_byte);
703
704 {
705 /* The termination address of the dumb loop. */
706 register unsigned char *ceiling_addr
707 = BYTE_POS_ADDR (ceiling_byte) + 1;
708 register unsigned char *cursor
709 = BYTE_POS_ADDR (start_byte);
710 unsigned char *base = cursor;
711
712 while (cursor < ceiling_addr)
713 {
714 unsigned char *scan_start = cursor;
715
716 /* The dumb loop. */
717 while (*cursor != target && ++cursor < ceiling_addr)
718 ;
719
720 /* If we're looking for newlines, cache the fact that
721 the region from start to cursor is free of them. */
722 if (target == '\n' && newline_cache)
723 know_region_cache (current_buffer, newline_cache,
724 start_byte + scan_start - base,
725 start_byte + cursor - base);
726
727 /* Did we find the target character? */
728 if (cursor < ceiling_addr)
729 {
730 if (--count == 0)
731 {
732 immediate_quit = 0;
733 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
734 }
735 cursor++;
736 }
737 }
738
739 start = BYTE_TO_CHAR (start_byte + cursor - base);
740 }
741 }
742 else
743 while (start > end)
744 {
745 /* The last character to check before the next obstacle. */
746 int ceiling_byte = CHAR_TO_BYTE (end);
747 int start_byte = CHAR_TO_BYTE (start);
748 int tem;
749
750 /* Consult the newline cache, if appropriate. */
751 if (target == '\n' && newline_cache)
752 {
753 int next_change;
754 immediate_quit = 0;
755 while (region_cache_backward
756 (current_buffer, newline_cache, start_byte, &next_change))
757 start_byte = next_change;
758 immediate_quit = allow_quit;
759
760 /* Start should never be at or before end. */
761 if (start_byte <= ceiling_byte)
762 start_byte = ceiling_byte + 1;
763
764 /* Now the text before start is an unknown region, and
765 next_change is the position of the next known region. */
766 ceiling_byte = max (next_change, ceiling_byte);
767 }
768
769 /* Stop scanning before the gap. */
770 tem = BUFFER_FLOOR_OF (start_byte - 1);
771 ceiling_byte = max (tem, ceiling_byte);
772
773 {
774 /* The termination address of the dumb loop. */
775 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
776 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
777 unsigned char *base = cursor;
778
779 while (cursor >= ceiling_addr)
780 {
781 unsigned char *scan_start = cursor;
782
783 while (*cursor != target && --cursor >= ceiling_addr)
784 ;
785
786 /* If we're looking for newlines, cache the fact that
787 the region from after the cursor to start is free of them. */
788 if (target == '\n' && newline_cache)
789 know_region_cache (current_buffer, newline_cache,
790 start_byte + cursor - base,
791 start_byte + scan_start - base);
792
793 /* Did we find the target character? */
794 if (cursor >= ceiling_addr)
795 {
796 if (++count >= 0)
797 {
798 immediate_quit = 0;
799 return BYTE_TO_CHAR (start_byte + cursor - base);
800 }
801 cursor--;
802 }
803 }
804
805 start = BYTE_TO_CHAR (start_byte + cursor - base);
806 }
807 }
808
809 immediate_quit = 0;
810 if (shortage != 0)
811 *shortage = count * direction;
812 return start;
813 }
814 \f
815 /* Search for COUNT instances of a line boundary, which means either a
816 newline or (if selective display enabled) a carriage return.
817 Start at START. If COUNT is negative, search backwards.
818
819 We report the resulting position by calling TEMP_SET_PT_BOTH.
820
821 If we find COUNT instances. we position after (always after,
822 even if scanning backwards) the COUNTth match, and return 0.
823
824 If we don't find COUNT instances before reaching the end of the
825 buffer (or the beginning, if scanning backwards), we return
826 the number of line boundaries left unfound, and position at
827 the limit we bumped up against.
828
829 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
830 except in special cases. */
831
832 int
833 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
834 int start, start_byte;
835 int limit, limit_byte;
836 register int count;
837 int allow_quit;
838 {
839 int direction = ((count > 0) ? 1 : -1);
840
841 register unsigned char *cursor;
842 unsigned char *base;
843
844 register int ceiling;
845 register unsigned char *ceiling_addr;
846
847 int old_immediate_quit = immediate_quit;
848
849 /* The code that follows is like scan_buffer
850 but checks for either newline or carriage return. */
851
852 if (allow_quit)
853 immediate_quit++;
854
855 start_byte = CHAR_TO_BYTE (start);
856
857 if (count > 0)
858 {
859 while (start_byte < limit_byte)
860 {
861 ceiling = BUFFER_CEILING_OF (start_byte);
862 ceiling = min (limit_byte - 1, ceiling);
863 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
864 base = (cursor = BYTE_POS_ADDR (start_byte));
865 while (1)
866 {
867 while (*cursor != '\n' && ++cursor != ceiling_addr)
868 ;
869
870 if (cursor != ceiling_addr)
871 {
872 if (--count == 0)
873 {
874 immediate_quit = old_immediate_quit;
875 start_byte = start_byte + cursor - base + 1;
876 start = BYTE_TO_CHAR (start_byte);
877 TEMP_SET_PT_BOTH (start, start_byte);
878 return 0;
879 }
880 else
881 if (++cursor == ceiling_addr)
882 break;
883 }
884 else
885 break;
886 }
887 start_byte += cursor - base;
888 }
889 }
890 else
891 {
892 while (start_byte > limit_byte)
893 {
894 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
895 ceiling = max (limit_byte, ceiling);
896 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
897 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
898 while (1)
899 {
900 while (--cursor != ceiling_addr && *cursor != '\n')
901 ;
902
903 if (cursor != ceiling_addr)
904 {
905 if (++count == 0)
906 {
907 immediate_quit = old_immediate_quit;
908 /* Return the position AFTER the match we found. */
909 start_byte = start_byte + cursor - base + 1;
910 start = BYTE_TO_CHAR (start_byte);
911 TEMP_SET_PT_BOTH (start, start_byte);
912 return 0;
913 }
914 }
915 else
916 break;
917 }
918 /* Here we add 1 to compensate for the last decrement
919 of CURSOR, which took it past the valid range. */
920 start_byte += cursor - base + 1;
921 }
922 }
923
924 TEMP_SET_PT_BOTH (limit, limit_byte);
925 immediate_quit = old_immediate_quit;
926
927 return count * direction;
928 }
929
930 int
931 find_next_newline_no_quit (from, cnt)
932 register int from, cnt;
933 {
934 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
935 }
936
937 /* Like find_next_newline, but returns position before the newline,
938 not after, and only search up to TO. This isn't just
939 find_next_newline (...)-1, because you might hit TO. */
940
941 int
942 find_before_next_newline (from, to, cnt)
943 int from, to, cnt;
944 {
945 int shortage;
946 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
947
948 if (shortage == 0)
949 pos--;
950
951 return pos;
952 }
953 \f
954 /* Subroutines of Lisp buffer search functions. */
955
956 static Lisp_Object
957 search_command (string, bound, noerror, count, direction, RE, posix)
958 Lisp_Object string, bound, noerror, count;
959 int direction;
960 int RE;
961 int posix;
962 {
963 register int np;
964 int lim, lim_byte;
965 int n = direction;
966
967 if (!NILP (count))
968 {
969 CHECK_NUMBER (count);
970 n *= XINT (count);
971 }
972
973 CHECK_STRING (string);
974 if (NILP (bound))
975 {
976 if (n > 0)
977 lim = ZV, lim_byte = ZV_BYTE;
978 else
979 lim = BEGV, lim_byte = BEGV_BYTE;
980 }
981 else
982 {
983 CHECK_NUMBER_COERCE_MARKER (bound);
984 lim = XINT (bound);
985 if (n > 0 ? lim < PT : lim > PT)
986 error ("Invalid search bound (wrong side of point)");
987 if (lim > ZV)
988 lim = ZV, lim_byte = ZV_BYTE;
989 else if (lim < BEGV)
990 lim = BEGV, lim_byte = BEGV_BYTE;
991 else
992 lim_byte = CHAR_TO_BYTE (lim);
993 }
994
995 /* This is so set_image_of_range_1 in regex.c can find the EQV table. */
996 XCHAR_TABLE (current_buffer->case_canon_table)->extras[2]
997 = current_buffer->case_eqv_table;
998
999 np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
1000 (!NILP (current_buffer->case_fold_search)
1001 ? current_buffer->case_canon_table
1002 : Qnil),
1003 (!NILP (current_buffer->case_fold_search)
1004 ? current_buffer->case_eqv_table
1005 : Qnil),
1006 posix);
1007 if (np <= 0)
1008 {
1009 if (NILP (noerror))
1010 xsignal1 (Qsearch_failed, string);
1011
1012 if (!EQ (noerror, Qt))
1013 {
1014 if (lim < BEGV || lim > ZV)
1015 abort ();
1016 SET_PT_BOTH (lim, lim_byte);
1017 return Qnil;
1018 #if 0 /* This would be clean, but maybe programs depend on
1019 a value of nil here. */
1020 np = lim;
1021 #endif
1022 }
1023 else
1024 return Qnil;
1025 }
1026
1027 if (np < BEGV || np > ZV)
1028 abort ();
1029
1030 SET_PT (np);
1031
1032 return make_number (np);
1033 }
1034 \f
1035 /* Return 1 if REGEXP it matches just one constant string. */
1036
1037 static int
1038 trivial_regexp_p (regexp)
1039 Lisp_Object regexp;
1040 {
1041 int len = SBYTES (regexp);
1042 unsigned char *s = SDATA (regexp);
1043 while (--len >= 0)
1044 {
1045 switch (*s++)
1046 {
1047 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
1048 return 0;
1049 case '\\':
1050 if (--len < 0)
1051 return 0;
1052 switch (*s++)
1053 {
1054 case '|': case '(': case ')': case '`': case '\'': case 'b':
1055 case 'B': case '<': case '>': case 'w': case 'W': case 's':
1056 case 'S': case '=': case '{': case '}': case '_':
1057 case 'c': case 'C': /* for categoryspec and notcategoryspec */
1058 case '1': case '2': case '3': case '4': case '5':
1059 case '6': case '7': case '8': case '9':
1060 return 0;
1061 }
1062 }
1063 }
1064 return 1;
1065 }
1066
1067 /* Search for the n'th occurrence of STRING in the current buffer,
1068 starting at position POS and stopping at position LIM,
1069 treating STRING as a literal string if RE is false or as
1070 a regular expression if RE is true.
1071
1072 If N is positive, searching is forward and LIM must be greater than POS.
1073 If N is negative, searching is backward and LIM must be less than POS.
1074
1075 Returns -x if x occurrences remain to be found (x > 0),
1076 or else the position at the beginning of the Nth occurrence
1077 (if searching backward) or the end (if searching forward).
1078
1079 POSIX is nonzero if we want full backtracking (POSIX style)
1080 for this pattern. 0 means backtrack only enough to get a valid match. */
1081
1082 #define TRANSLATE(out, trt, d) \
1083 do \
1084 { \
1085 if (! NILP (trt)) \
1086 { \
1087 Lisp_Object temp; \
1088 temp = Faref (trt, make_number (d)); \
1089 if (INTEGERP (temp)) \
1090 out = XINT (temp); \
1091 else \
1092 out = d; \
1093 } \
1094 else \
1095 out = d; \
1096 } \
1097 while (0)
1098
1099 /* Only used in search_buffer, to record the end position of the match
1100 when searching regexps and SEARCH_REGS should not be changed
1101 (i.e. Vinhibit_changing_match_data is non-nil). */
1102 static struct re_registers search_regs_1;
1103
1104 static int
1105 search_buffer (string, pos, pos_byte, lim, lim_byte, n,
1106 RE, trt, inverse_trt, posix)
1107 Lisp_Object string;
1108 int pos;
1109 int pos_byte;
1110 int lim;
1111 int lim_byte;
1112 int n;
1113 int RE;
1114 Lisp_Object trt;
1115 Lisp_Object inverse_trt;
1116 int posix;
1117 {
1118 int len = SCHARS (string);
1119 int len_byte = SBYTES (string);
1120 register int i;
1121
1122 if (running_asynch_code)
1123 save_search_regs ();
1124
1125 /* Searching 0 times means don't move. */
1126 /* Null string is found at starting position. */
1127 if (len == 0 || n == 0)
1128 {
1129 set_search_regs (pos_byte, 0);
1130 return pos;
1131 }
1132
1133 if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
1134 {
1135 unsigned char *p1, *p2;
1136 int s1, s2;
1137 struct re_pattern_buffer *bufp;
1138
1139 bufp = compile_pattern (string,
1140 (NILP (Vinhibit_changing_match_data)
1141 ? &search_regs : &search_regs_1),
1142 trt, posix,
1143 !NILP (current_buffer->enable_multibyte_characters));
1144
1145 immediate_quit = 1; /* Quit immediately if user types ^G,
1146 because letting this function finish
1147 can take too long. */
1148 QUIT; /* Do a pending quit right away,
1149 to avoid paradoxical behavior */
1150 /* Get pointers and sizes of the two strings
1151 that make up the visible portion of the buffer. */
1152
1153 p1 = BEGV_ADDR;
1154 s1 = GPT_BYTE - BEGV_BYTE;
1155 p2 = GAP_END_ADDR;
1156 s2 = ZV_BYTE - GPT_BYTE;
1157 if (s1 < 0)
1158 {
1159 p2 = p1;
1160 s2 = ZV_BYTE - BEGV_BYTE;
1161 s1 = 0;
1162 }
1163 if (s2 < 0)
1164 {
1165 s1 = ZV_BYTE - BEGV_BYTE;
1166 s2 = 0;
1167 }
1168 re_match_object = Qnil;
1169
1170 while (n < 0)
1171 {
1172 int val;
1173 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1174 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1175 (NILP (Vinhibit_changing_match_data)
1176 ? &search_regs : &search_regs_1),
1177 /* Don't allow match past current point */
1178 pos_byte - BEGV_BYTE);
1179 if (val == -2)
1180 {
1181 matcher_overflow ();
1182 }
1183 if (val >= 0)
1184 {
1185 if (NILP (Vinhibit_changing_match_data))
1186 {
1187 pos_byte = search_regs.start[0] + BEGV_BYTE;
1188 for (i = 0; i < search_regs.num_regs; i++)
1189 if (search_regs.start[i] >= 0)
1190 {
1191 search_regs.start[i]
1192 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1193 search_regs.end[i]
1194 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1195 }
1196 XSETBUFFER (last_thing_searched, current_buffer);
1197 /* Set pos to the new position. */
1198 pos = search_regs.start[0];
1199 }
1200 else
1201 {
1202 pos_byte = search_regs_1.start[0] + BEGV_BYTE;
1203 /* Set pos to the new position. */
1204 pos = BYTE_TO_CHAR (search_regs_1.start[0] + BEGV_BYTE);
1205 }
1206 }
1207 else
1208 {
1209 immediate_quit = 0;
1210 return (n);
1211 }
1212 n++;
1213 }
1214 while (n > 0)
1215 {
1216 int val;
1217 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1218 pos_byte - BEGV_BYTE, lim_byte - pos_byte,
1219 (NILP (Vinhibit_changing_match_data)
1220 ? &search_regs : &search_regs_1),
1221 lim_byte - BEGV_BYTE);
1222 if (val == -2)
1223 {
1224 matcher_overflow ();
1225 }
1226 if (val >= 0)
1227 {
1228 if (NILP (Vinhibit_changing_match_data))
1229 {
1230 pos_byte = search_regs.end[0] + BEGV_BYTE;
1231 for (i = 0; i < search_regs.num_regs; i++)
1232 if (search_regs.start[i] >= 0)
1233 {
1234 search_regs.start[i]
1235 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1236 search_regs.end[i]
1237 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1238 }
1239 XSETBUFFER (last_thing_searched, current_buffer);
1240 pos = search_regs.end[0];
1241 }
1242 else
1243 {
1244 pos_byte = search_regs_1.end[0] + BEGV_BYTE;
1245 pos = BYTE_TO_CHAR (search_regs_1.end[0] + BEGV_BYTE);
1246 }
1247 }
1248 else
1249 {
1250 immediate_quit = 0;
1251 return (0 - n);
1252 }
1253 n--;
1254 }
1255 immediate_quit = 0;
1256 return (pos);
1257 }
1258 else /* non-RE case */
1259 {
1260 unsigned char *raw_pattern, *pat;
1261 int raw_pattern_size;
1262 int raw_pattern_size_byte;
1263 unsigned char *patbuf;
1264 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
1265 unsigned char *base_pat;
1266 /* Set to positive if we find a non-ASCII char that need
1267 translation. Otherwise set to zero later. */
1268 int charset_base = -1;
1269 int boyer_moore_ok = 1;
1270
1271 /* MULTIBYTE says whether the text to be searched is multibyte.
1272 We must convert PATTERN to match that, or we will not really
1273 find things right. */
1274
1275 if (multibyte == STRING_MULTIBYTE (string))
1276 {
1277 raw_pattern = (unsigned char *) SDATA (string);
1278 raw_pattern_size = SCHARS (string);
1279 raw_pattern_size_byte = SBYTES (string);
1280 }
1281 else if (multibyte)
1282 {
1283 raw_pattern_size = SCHARS (string);
1284 raw_pattern_size_byte
1285 = count_size_as_multibyte (SDATA (string),
1286 raw_pattern_size);
1287 raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
1288 copy_text (SDATA (string), raw_pattern,
1289 SCHARS (string), 0, 1);
1290 }
1291 else
1292 {
1293 /* Converting multibyte to single-byte.
1294
1295 ??? Perhaps this conversion should be done in a special way
1296 by subtracting nonascii-insert-offset from each non-ASCII char,
1297 so that only the multibyte chars which really correspond to
1298 the chosen single-byte character set can possibly match. */
1299 raw_pattern_size = SCHARS (string);
1300 raw_pattern_size_byte = SCHARS (string);
1301 raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
1302 copy_text (SDATA (string), raw_pattern,
1303 SBYTES (string), 1, 0);
1304 }
1305
1306 /* Copy and optionally translate the pattern. */
1307 len = raw_pattern_size;
1308 len_byte = raw_pattern_size_byte;
1309 patbuf = (unsigned char *) alloca (len_byte);
1310 pat = patbuf;
1311 base_pat = raw_pattern;
1312 if (multibyte)
1313 {
1314 /* Fill patbuf by translated characters in STRING while
1315 checking if we can use boyer-moore search. If TRT is
1316 non-nil, we can use boyer-moore search only if TRT can be
1317 represented by the byte array of 256 elements. For that,
1318 all non-ASCII case-equivalents of all case-senstive
1319 characters in STRING must belong to the same charset and
1320 row. */
1321
1322 while (--len >= 0)
1323 {
1324 unsigned char str_base[MAX_MULTIBYTE_LENGTH], *str;
1325 int c, translated, inverse;
1326 int in_charlen, charlen;
1327
1328 /* If we got here and the RE flag is set, it's because we're
1329 dealing with a regexp known to be trivial, so the backslash
1330 just quotes the next character. */
1331 if (RE && *base_pat == '\\')
1332 {
1333 len--;
1334 raw_pattern_size--;
1335 len_byte--;
1336 base_pat++;
1337 }
1338
1339 c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
1340
1341 if (NILP (trt))
1342 {
1343 str = base_pat;
1344 charlen = in_charlen;
1345 }
1346 else
1347 {
1348 /* Translate the character. */
1349 TRANSLATE (translated, trt, c);
1350 charlen = CHAR_STRING (translated, str_base);
1351 str = str_base;
1352
1353 /* Check if C has any other case-equivalents. */
1354 TRANSLATE (inverse, inverse_trt, c);
1355 /* If so, check if we can use boyer-moore. */
1356 if (c != inverse && boyer_moore_ok)
1357 {
1358 /* Check if all equivalents belong to the same
1359 charset & row. Note that the check of C
1360 itself is done by the last iteration. Note
1361 also that we don't have to check ASCII
1362 characters because boyer-moore search can
1363 always handle their translation. */
1364 while (1)
1365 {
1366 if (ASCII_BYTE_P (inverse))
1367 {
1368 if (charset_base > 0)
1369 {
1370 boyer_moore_ok = 0;
1371 break;
1372 }
1373 charset_base = 0;
1374 }
1375 else if (SINGLE_BYTE_CHAR_P (inverse))
1376 {
1377 /* Boyer-moore search can't handle a
1378 translation of an eight-bit
1379 character. */
1380 boyer_moore_ok = 0;
1381 break;
1382 }
1383 else if (charset_base < 0)
1384 charset_base = inverse & ~CHAR_FIELD3_MASK;
1385 else if ((inverse & ~CHAR_FIELD3_MASK)
1386 != charset_base)
1387 {
1388 boyer_moore_ok = 0;
1389 break;
1390 }
1391 if (c == inverse)
1392 break;
1393 TRANSLATE (inverse, inverse_trt, inverse);
1394 }
1395 }
1396 }
1397 if (charset_base < 0)
1398 charset_base = 0;
1399
1400 /* Store this character into the translated pattern. */
1401 bcopy (str, pat, charlen);
1402 pat += charlen;
1403 base_pat += in_charlen;
1404 len_byte -= in_charlen;
1405 }
1406 }
1407 else
1408 {
1409 /* Unibyte buffer. */
1410 charset_base = 0;
1411 while (--len >= 0)
1412 {
1413 int c, translated;
1414
1415 /* If we got here and the RE flag is set, it's because we're
1416 dealing with a regexp known to be trivial, so the backslash
1417 just quotes the next character. */
1418 if (RE && *base_pat == '\\')
1419 {
1420 len--;
1421 raw_pattern_size--;
1422 base_pat++;
1423 }
1424 c = *base_pat++;
1425 TRANSLATE (translated, trt, c);
1426 *pat++ = translated;
1427 }
1428 }
1429
1430 len_byte = pat - patbuf;
1431 len = raw_pattern_size;
1432 pat = base_pat = patbuf;
1433
1434 if (boyer_moore_ok)
1435 return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
1436 pos, pos_byte, lim, lim_byte,
1437 charset_base);
1438 else
1439 return simple_search (n, pat, len, len_byte, trt,
1440 pos, pos_byte, lim, lim_byte);
1441 }
1442 }
1443 \f
1444 /* Do a simple string search N times for the string PAT,
1445 whose length is LEN/LEN_BYTE,
1446 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1447 TRT is the translation table.
1448
1449 Return the character position where the match is found.
1450 Otherwise, if M matches remained to be found, return -M.
1451
1452 This kind of search works regardless of what is in PAT and
1453 regardless of what is in TRT. It is used in cases where
1454 boyer_moore cannot work. */
1455
1456 static int
1457 simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
1458 int n;
1459 unsigned char *pat;
1460 int len, len_byte;
1461 Lisp_Object trt;
1462 int pos, pos_byte;
1463 int lim, lim_byte;
1464 {
1465 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1466 int forward = n > 0;
1467
1468 if (lim > pos && multibyte)
1469 while (n > 0)
1470 {
1471 while (1)
1472 {
1473 /* Try matching at position POS. */
1474 int this_pos = pos;
1475 int this_pos_byte = pos_byte;
1476 int this_len = len;
1477 int this_len_byte = len_byte;
1478 unsigned char *p = pat;
1479 if (pos + len > lim)
1480 goto stop;
1481
1482 while (this_len > 0)
1483 {
1484 int charlen, buf_charlen;
1485 int pat_ch, buf_ch;
1486
1487 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1488 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1489 ZV_BYTE - this_pos_byte,
1490 buf_charlen);
1491 TRANSLATE (buf_ch, trt, buf_ch);
1492
1493 if (buf_ch != pat_ch)
1494 break;
1495
1496 this_len_byte -= charlen;
1497 this_len--;
1498 p += charlen;
1499
1500 this_pos_byte += buf_charlen;
1501 this_pos++;
1502 }
1503
1504 if (this_len == 0)
1505 {
1506 pos += len;
1507 pos_byte += len_byte;
1508 break;
1509 }
1510
1511 INC_BOTH (pos, pos_byte);
1512 }
1513
1514 n--;
1515 }
1516 else if (lim > pos)
1517 while (n > 0)
1518 {
1519 while (1)
1520 {
1521 /* Try matching at position POS. */
1522 int this_pos = pos;
1523 int this_len = len;
1524 unsigned char *p = pat;
1525
1526 if (pos + len > lim)
1527 goto stop;
1528
1529 while (this_len > 0)
1530 {
1531 int pat_ch = *p++;
1532 int buf_ch = FETCH_BYTE (this_pos);
1533 TRANSLATE (buf_ch, trt, buf_ch);
1534
1535 if (buf_ch != pat_ch)
1536 break;
1537
1538 this_len--;
1539 this_pos++;
1540 }
1541
1542 if (this_len == 0)
1543 {
1544 pos += len;
1545 break;
1546 }
1547
1548 pos++;
1549 }
1550
1551 n--;
1552 }
1553 /* Backwards search. */
1554 else if (lim < pos && multibyte)
1555 while (n < 0)
1556 {
1557 while (1)
1558 {
1559 /* Try matching at position POS. */
1560 int this_pos = pos - len;
1561 int this_pos_byte = pos_byte - len_byte;
1562 int this_len = len;
1563 int this_len_byte = len_byte;
1564 unsigned char *p = pat;
1565
1566 if (this_pos < lim || this_pos_byte < lim_byte)
1567 goto stop;
1568
1569 while (this_len > 0)
1570 {
1571 int charlen, buf_charlen;
1572 int pat_ch, buf_ch;
1573
1574 pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
1575 buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
1576 ZV_BYTE - this_pos_byte,
1577 buf_charlen);
1578 TRANSLATE (buf_ch, trt, buf_ch);
1579
1580 if (buf_ch != pat_ch)
1581 break;
1582
1583 this_len_byte -= charlen;
1584 this_len--;
1585 p += charlen;
1586 this_pos_byte += buf_charlen;
1587 this_pos++;
1588 }
1589
1590 if (this_len == 0)
1591 {
1592 pos -= len;
1593 pos_byte -= len_byte;
1594 break;
1595 }
1596
1597 DEC_BOTH (pos, pos_byte);
1598 }
1599
1600 n++;
1601 }
1602 else if (lim < pos)
1603 while (n < 0)
1604 {
1605 while (1)
1606 {
1607 /* Try matching at position POS. */
1608 int this_pos = pos - len;
1609 int this_len = len;
1610 unsigned char *p = pat;
1611
1612 if (pos - len < lim)
1613 goto stop;
1614
1615 while (this_len > 0)
1616 {
1617 int pat_ch = *p++;
1618 int buf_ch = FETCH_BYTE (this_pos);
1619 TRANSLATE (buf_ch, trt, buf_ch);
1620
1621 if (buf_ch != pat_ch)
1622 break;
1623 this_len--;
1624 this_pos++;
1625 }
1626
1627 if (this_len == 0)
1628 {
1629 pos -= len;
1630 break;
1631 }
1632
1633 pos--;
1634 }
1635
1636 n++;
1637 }
1638
1639 stop:
1640 if (n == 0)
1641 {
1642 if (forward)
1643 set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
1644 else
1645 set_search_regs (multibyte ? pos_byte : pos, len_byte);
1646
1647 return pos;
1648 }
1649 else if (n > 0)
1650 return -n;
1651 else
1652 return n;
1653 }
1654 \f
1655 /* Do Boyer-Moore search N times for the string BASE_PAT,
1656 whose length is LEN/LEN_BYTE,
1657 from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
1658 DIRECTION says which direction we search in.
1659 TRT and INVERSE_TRT are translation tables.
1660 Characters in PAT are already translated by TRT.
1661
1662 This kind of search works if all the characters in BASE_PAT that
1663 have nontrivial translation are the same aside from the last byte.
1664 This makes it possible to translate just the last byte of a
1665 character, and do so after just a simple test of the context.
1666 CHARSET_BASE is nonzero if there is such a non-ASCII character.
1667
1668 If that criterion is not satisfied, do not call this function. */
1669
1670 static int
1671 boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
1672 pos, pos_byte, lim, lim_byte, charset_base)
1673 int n;
1674 unsigned char *base_pat;
1675 int len, len_byte;
1676 Lisp_Object trt;
1677 Lisp_Object inverse_trt;
1678 int pos, pos_byte;
1679 int lim, lim_byte;
1680 int charset_base;
1681 {
1682 int direction = ((n > 0) ? 1 : -1);
1683 register int dirlen;
1684 int infinity, limit, stride_for_teases = 0;
1685 register int *BM_tab;
1686 int *BM_tab_base;
1687 register unsigned char *cursor, *p_limit;
1688 register int i, j;
1689 unsigned char *pat, *pat_end;
1690 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1691
1692 unsigned char simple_translate[0400];
1693 /* These are set to the preceding bytes of a byte to be translated
1694 if charset_base is nonzero. As the maximum byte length of a
1695 multibyte character is 4, we have to check at most three previous
1696 bytes. */
1697 int translate_prev_byte1 = 0;
1698 int translate_prev_byte2 = 0;
1699 int translate_prev_byte3 = 0;
1700
1701 BM_tab = (int *) alloca (0400 * sizeof (int));
1702
1703 /* The general approach is that we are going to maintain that we know */
1704 /* the first (closest to the present position, in whatever direction */
1705 /* we're searching) character that could possibly be the last */
1706 /* (furthest from present position) character of a valid match. We */
1707 /* advance the state of our knowledge by looking at that character */
1708 /* and seeing whether it indeed matches the last character of the */
1709 /* pattern. If it does, we take a closer look. If it does not, we */
1710 /* move our pointer (to putative last characters) as far as is */
1711 /* logically possible. This amount of movement, which I call a */
1712 /* stride, will be the length of the pattern if the actual character */
1713 /* appears nowhere in the pattern, otherwise it will be the distance */
1714 /* from the last occurrence of that character to the end of the */
1715 /* pattern. */
1716 /* As a coding trick, an enormous stride is coded into the table for */
1717 /* characters that match the last character. This allows use of only */
1718 /* a single test, a test for having gone past the end of the */
1719 /* permissible match region, to test for both possible matches (when */
1720 /* the stride goes past the end immediately) and failure to */
1721 /* match (where you get nudged past the end one stride at a time). */
1722
1723 /* Here we make a "mickey mouse" BM table. The stride of the search */
1724 /* is determined only by the last character of the putative match. */
1725 /* If that character does not match, we will stride the proper */
1726 /* distance to propose a match that superimposes it on the last */
1727 /* instance of a character that matches it (per trt), or misses */
1728 /* it entirely if there is none. */
1729
1730 dirlen = len_byte * direction;
1731 infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
1732
1733 /* Record position after the end of the pattern. */
1734 pat_end = base_pat + len_byte;
1735 /* BASE_PAT points to a character that we start scanning from.
1736 It is the first character in a forward search,
1737 the last character in a backward search. */
1738 if (direction < 0)
1739 base_pat = pat_end - 1;
1740
1741 BM_tab_base = BM_tab;
1742 BM_tab += 0400;
1743 j = dirlen; /* to get it in a register */
1744 /* A character that does not appear in the pattern induces a */
1745 /* stride equal to the pattern length. */
1746 while (BM_tab_base != BM_tab)
1747 {
1748 *--BM_tab = j;
1749 *--BM_tab = j;
1750 *--BM_tab = j;
1751 *--BM_tab = j;
1752 }
1753
1754 /* We use this for translation, instead of TRT itself.
1755 We fill this in to handle the characters that actually
1756 occur in the pattern. Others don't matter anyway! */
1757 bzero (simple_translate, sizeof simple_translate);
1758 for (i = 0; i < 0400; i++)
1759 simple_translate[i] = i;
1760
1761 if (charset_base)
1762 {
1763 /* Setup translate_prev_byte1/2/3 from CHARSET_BASE. Only a
1764 byte following them are the target of translation. */
1765 int sample_char = charset_base | 0x20;
1766 unsigned char str[MAX_MULTIBYTE_LENGTH];
1767 int len = CHAR_STRING (sample_char, str);
1768
1769 translate_prev_byte1 = str[len - 2];
1770 if (len > 2)
1771 {
1772 translate_prev_byte2 = str[len - 3];
1773 if (len > 3)
1774 translate_prev_byte3 = str[len - 4];
1775 }
1776 }
1777
1778 i = 0;
1779 while (i != infinity)
1780 {
1781 unsigned char *ptr = base_pat + i;
1782 i += direction;
1783 if (i == dirlen)
1784 i = infinity;
1785 if (! NILP (trt))
1786 {
1787 /* If the byte currently looking at is the last of a
1788 character to check case-equivalents, set CH to that
1789 character. An ASCII character and a non-ASCII character
1790 matching with CHARSET_BASE are to be checked. */
1791 int ch = -1;
1792
1793 if (ASCII_BYTE_P (*ptr) || ! multibyte)
1794 ch = *ptr;
1795 else if (charset_base
1796 && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
1797 {
1798 unsigned char *charstart = ptr - 1;
1799
1800 while (! (CHAR_HEAD_P (*charstart)))
1801 charstart--;
1802 ch = STRING_CHAR (charstart, ptr - charstart + 1);
1803 if (charset_base != (ch & ~CHAR_FIELD3_MASK))
1804 ch = -1;
1805 }
1806
1807 if (ch >= 0400)
1808 j = ((unsigned char) ch) | 0200;
1809 else
1810 j = *ptr;
1811
1812 if (i == infinity)
1813 stride_for_teases = BM_tab[j];
1814
1815 BM_tab[j] = dirlen - i;
1816 /* A translation table is accompanied by its inverse -- see */
1817 /* comment following downcase_table for details */
1818 if (ch >= 0)
1819 {
1820 int starting_ch = ch;
1821 int starting_j = j;
1822
1823 while (1)
1824 {
1825 TRANSLATE (ch, inverse_trt, ch);
1826 if (ch >= 0400)
1827 j = ((unsigned char) ch) | 0200;
1828 else
1829 j = (unsigned char) ch;
1830
1831 /* For all the characters that map into CH,
1832 set up simple_translate to map the last byte
1833 into STARTING_J. */
1834 simple_translate[j] = starting_j;
1835 if (ch == starting_ch)
1836 break;
1837 BM_tab[j] = dirlen - i;
1838 }
1839 }
1840 }
1841 else
1842 {
1843 j = *ptr;
1844
1845 if (i == infinity)
1846 stride_for_teases = BM_tab[j];
1847 BM_tab[j] = dirlen - i;
1848 }
1849 /* stride_for_teases tells how much to stride if we get a */
1850 /* match on the far character but are subsequently */
1851 /* disappointed, by recording what the stride would have been */
1852 /* for that character if the last character had been */
1853 /* different. */
1854 }
1855 infinity = dirlen - infinity;
1856 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1857 /* loop invariant - POS_BYTE points at where last char (first
1858 char if reverse) of pattern would align in a possible match. */
1859 while (n != 0)
1860 {
1861 int tail_end;
1862 unsigned char *tail_end_ptr;
1863
1864 /* It's been reported that some (broken) compiler thinks that
1865 Boolean expressions in an arithmetic context are unsigned.
1866 Using an explicit ?1:0 prevents this. */
1867 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1868 < 0)
1869 return (n * (0 - direction));
1870 /* First we do the part we can by pointers (maybe nothing) */
1871 QUIT;
1872 pat = base_pat;
1873 limit = pos_byte - dirlen + direction;
1874 if (direction > 0)
1875 {
1876 limit = BUFFER_CEILING_OF (limit);
1877 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1878 can take on without hitting edge of buffer or the gap. */
1879 limit = min (limit, pos_byte + 20000);
1880 limit = min (limit, lim_byte - 1);
1881 }
1882 else
1883 {
1884 limit = BUFFER_FLOOR_OF (limit);
1885 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1886 can take on without hitting edge of buffer or the gap. */
1887 limit = max (limit, pos_byte - 20000);
1888 limit = max (limit, lim_byte);
1889 }
1890 tail_end = BUFFER_CEILING_OF (pos_byte) + 1;
1891 tail_end_ptr = BYTE_POS_ADDR (tail_end);
1892
1893 if ((limit - pos_byte) * direction > 20)
1894 {
1895 unsigned char *p2;
1896
1897 p_limit = BYTE_POS_ADDR (limit);
1898 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1899 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1900 while (1) /* use one cursor setting as long as i can */
1901 {
1902 if (direction > 0) /* worth duplicating */
1903 {
1904 /* Use signed comparison if appropriate
1905 to make cursor+infinity sure to be > p_limit.
1906 Assuming that the buffer lies in a range of addresses
1907 that are all "positive" (as ints) or all "negative",
1908 either kind of comparison will work as long
1909 as we don't step by infinity. So pick the kind
1910 that works when we do step by infinity. */
1911 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1912 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1913 cursor += BM_tab[*cursor];
1914 else
1915 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1916 cursor += BM_tab[*cursor];
1917 }
1918 else
1919 {
1920 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1921 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1922 cursor += BM_tab[*cursor];
1923 else
1924 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1925 cursor += BM_tab[*cursor];
1926 }
1927 /* If you are here, cursor is beyond the end of the searched region. */
1928 /* This can happen if you match on the far character of the pattern, */
1929 /* because the "stride" of that character is infinity, a number able */
1930 /* to throw you well beyond the end of the search. It can also */
1931 /* happen if you fail to match within the permitted region and would */
1932 /* otherwise try a character beyond that region */
1933 if ((cursor - p_limit) * direction <= len_byte)
1934 break; /* a small overrun is genuine */
1935 cursor -= infinity; /* large overrun = hit */
1936 i = dirlen - direction;
1937 if (! NILP (trt))
1938 {
1939 while ((i -= direction) + direction != 0)
1940 {
1941 int ch;
1942 cursor -= direction;
1943 /* Translate only the last byte of a character. */
1944 if (! multibyte
1945 || ((cursor == tail_end_ptr
1946 || CHAR_HEAD_P (cursor[1]))
1947 && (CHAR_HEAD_P (cursor[0])
1948 /* Check if this is the last byte of
1949 a translable character. */
1950 || (translate_prev_byte1 == cursor[-1]
1951 && (CHAR_HEAD_P (translate_prev_byte1)
1952 || (translate_prev_byte2 == cursor[-2]
1953 && (CHAR_HEAD_P (translate_prev_byte2)
1954 || (translate_prev_byte3 == cursor[-3]))))))))
1955 ch = simple_translate[*cursor];
1956 else
1957 ch = *cursor;
1958 if (pat[i] != ch)
1959 break;
1960 }
1961 }
1962 else
1963 {
1964 while ((i -= direction) + direction != 0)
1965 {
1966 cursor -= direction;
1967 if (pat[i] != *cursor)
1968 break;
1969 }
1970 }
1971 cursor += dirlen - i - direction; /* fix cursor */
1972 if (i + direction == 0)
1973 {
1974 int position, start, end;
1975
1976 cursor -= direction;
1977
1978 position = pos_byte + cursor - p2 + ((direction > 0)
1979 ? 1 - len_byte : 0);
1980 set_search_regs (position, len_byte);
1981
1982 if (NILP (Vinhibit_changing_match_data))
1983 {
1984 start = search_regs.start[0];
1985 end = search_regs.end[0];
1986 }
1987 else
1988 /* If Vinhibit_changing_match_data is non-nil,
1989 search_regs will not be changed. So let's
1990 compute start and end here. */
1991 {
1992 start = BYTE_TO_CHAR (position);
1993 end = BYTE_TO_CHAR (position + len_byte);
1994 }
1995
1996 if ((n -= direction) != 0)
1997 cursor += dirlen; /* to resume search */
1998 else
1999 return direction > 0 ? end : start;
2000 }
2001 else
2002 cursor += stride_for_teases; /* <sigh> we lose - */
2003 }
2004 pos_byte += cursor - p2;
2005 }
2006 else
2007 /* Now we'll pick up a clump that has to be done the hard */
2008 /* way because it covers a discontinuity */
2009 {
2010 limit = ((direction > 0)
2011 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
2012 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
2013 limit = ((direction > 0)
2014 ? min (limit + len_byte, lim_byte - 1)
2015 : max (limit - len_byte, lim_byte));
2016 /* LIMIT is now the last value POS_BYTE can have
2017 and still be valid for a possible match. */
2018 while (1)
2019 {
2020 /* This loop can be coded for space rather than */
2021 /* speed because it will usually run only once. */
2022 /* (the reach is at most len + 21, and typically */
2023 /* does not exceed len) */
2024 while ((limit - pos_byte) * direction >= 0)
2025 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
2026 /* now run the same tests to distinguish going off the */
2027 /* end, a match or a phony match. */
2028 if ((pos_byte - limit) * direction <= len_byte)
2029 break; /* ran off the end */
2030 /* Found what might be a match.
2031 Set POS_BYTE back to last (first if reverse) pos. */
2032 pos_byte -= infinity;
2033 i = dirlen - direction;
2034 while ((i -= direction) + direction != 0)
2035 {
2036 int ch;
2037 unsigned char *ptr;
2038 pos_byte -= direction;
2039 ptr = BYTE_POS_ADDR (pos_byte);
2040 /* Translate only the last byte of a character. */
2041 if (! multibyte
2042 || ((ptr == tail_end_ptr
2043 || CHAR_HEAD_P (ptr[1]))
2044 && (CHAR_HEAD_P (ptr[0])
2045 /* Check if this is the last byte of a
2046 translable character. */
2047 || (translate_prev_byte1 == ptr[-1]
2048 && (CHAR_HEAD_P (translate_prev_byte1)
2049 || (translate_prev_byte2 == ptr[-2]
2050 && (CHAR_HEAD_P (translate_prev_byte2)
2051 || translate_prev_byte3 == ptr[-3])))))))
2052 ch = simple_translate[*ptr];
2053 else
2054 ch = *ptr;
2055 if (pat[i] != ch)
2056 break;
2057 }
2058 /* Above loop has moved POS_BYTE part or all the way
2059 back to the first pos (last pos if reverse).
2060 Set it once again at the last (first if reverse) char. */
2061 pos_byte += dirlen - i- direction;
2062 if (i + direction == 0)
2063 {
2064 int position, start, end;
2065 pos_byte -= direction;
2066
2067 position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
2068 set_search_regs (position, len_byte);
2069
2070 if (NILP (Vinhibit_changing_match_data))
2071 {
2072 start = search_regs.start[0];
2073 end = search_regs.end[0];
2074 }
2075 else
2076 /* If Vinhibit_changing_match_data is non-nil,
2077 search_regs will not be changed. So let's
2078 compute start and end here. */
2079 {
2080 start = BYTE_TO_CHAR (position);
2081 end = BYTE_TO_CHAR (position + len_byte);
2082 }
2083
2084 if ((n -= direction) != 0)
2085 pos_byte += dirlen; /* to resume search */
2086 else
2087 return direction > 0 ? end : start;
2088 }
2089 else
2090 pos_byte += stride_for_teases;
2091 }
2092 }
2093 /* We have done one clump. Can we continue? */
2094 if ((lim_byte - pos_byte) * direction < 0)
2095 return ((0 - n) * direction);
2096 }
2097 return BYTE_TO_CHAR (pos_byte);
2098 }
2099
2100 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
2101 for the overall match just found in the current buffer.
2102 Also clear out the match data for registers 1 and up. */
2103
2104 static void
2105 set_search_regs (beg_byte, nbytes)
2106 int beg_byte, nbytes;
2107 {
2108 int i;
2109
2110 if (!NILP (Vinhibit_changing_match_data))
2111 return;
2112
2113 /* Make sure we have registers in which to store
2114 the match position. */
2115 if (search_regs.num_regs == 0)
2116 {
2117 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2118 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
2119 search_regs.num_regs = 2;
2120 }
2121
2122 /* Clear out the other registers. */
2123 for (i = 1; i < search_regs.num_regs; i++)
2124 {
2125 search_regs.start[i] = -1;
2126 search_regs.end[i] = -1;
2127 }
2128
2129 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
2130 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
2131 XSETBUFFER (last_thing_searched, current_buffer);
2132 }
2133 \f
2134 /* Given a string of words separated by word delimiters,
2135 compute a regexp that matches those exact words
2136 separated by arbitrary punctuation. */
2137
2138 static Lisp_Object
2139 wordify (string)
2140 Lisp_Object string;
2141 {
2142 register unsigned char *p, *o;
2143 register int i, i_byte, len, punct_count = 0, word_count = 0;
2144 Lisp_Object val;
2145 int prev_c = 0;
2146 int adjust;
2147
2148 CHECK_STRING (string);
2149 p = SDATA (string);
2150 len = SCHARS (string);
2151
2152 for (i = 0, i_byte = 0; i < len; )
2153 {
2154 int c;
2155
2156 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2157
2158 if (SYNTAX (c) != Sword)
2159 {
2160 punct_count++;
2161 if (i > 0 && SYNTAX (prev_c) == Sword)
2162 word_count++;
2163 }
2164
2165 prev_c = c;
2166 }
2167
2168 if (SYNTAX (prev_c) == Sword)
2169 word_count++;
2170 if (!word_count)
2171 return empty_unibyte_string;
2172
2173 adjust = - punct_count + 5 * (word_count - 1) + 4;
2174 if (STRING_MULTIBYTE (string))
2175 val = make_uninit_multibyte_string (len + adjust,
2176 SBYTES (string)
2177 + adjust);
2178 else
2179 val = make_uninit_string (len + adjust);
2180
2181 o = SDATA (val);
2182 *o++ = '\\';
2183 *o++ = 'b';
2184 prev_c = 0;
2185
2186 for (i = 0, i_byte = 0; i < len; )
2187 {
2188 int c;
2189 int i_byte_orig = i_byte;
2190
2191 FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
2192
2193 if (SYNTAX (c) == Sword)
2194 {
2195 bcopy (SDATA (string) + i_byte_orig, o,
2196 i_byte - i_byte_orig);
2197 o += i_byte - i_byte_orig;
2198 }
2199 else if (i > 0 && SYNTAX (prev_c) == Sword && --word_count)
2200 {
2201 *o++ = '\\';
2202 *o++ = 'W';
2203 *o++ = '\\';
2204 *o++ = 'W';
2205 *o++ = '*';
2206 }
2207
2208 prev_c = c;
2209 }
2210
2211 *o++ = '\\';
2212 *o++ = 'b';
2213
2214 return val;
2215 }
2216 \f
2217 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
2218 "MSearch backward: ",
2219 doc: /* Search backward from point for STRING.
2220 Set point to the beginning of the occurrence found, and return point.
2221 An optional second argument bounds the search; it is a buffer position.
2222 The match found must not extend before that position.
2223 Optional third argument, if t, means if fail just return nil (no error).
2224 If not nil and not t, position at limit of search and return nil.
2225 Optional fourth argument is repeat count--search for successive occurrences.
2226
2227 Search case-sensitivity is determined by the value of the variable
2228 `case-fold-search', which see.
2229
2230 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2231 (string, bound, noerror, count)
2232 Lisp_Object string, bound, noerror, count;
2233 {
2234 return search_command (string, bound, noerror, count, -1, 0, 0);
2235 }
2236
2237 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
2238 doc: /* Search forward from point for STRING.
2239 Set point to the end of the occurrence found, and return point.
2240 An optional second argument bounds the search; it is a buffer position.
2241 The match found must not extend after that position. A value of nil is
2242 equivalent to (point-max).
2243 Optional third argument, if t, means if fail just return nil (no error).
2244 If not nil and not t, move to limit of search and return nil.
2245 Optional fourth argument is repeat count--search for successive occurrences.
2246
2247 Search case-sensitivity is determined by the value of the variable
2248 `case-fold-search', which see.
2249
2250 See also the functions `match-beginning', `match-end' and `replace-match'. */)
2251 (string, bound, noerror, count)
2252 Lisp_Object string, bound, noerror, count;
2253 {
2254 return search_command (string, bound, noerror, count, 1, 0, 0);
2255 }
2256
2257 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
2258 "sWord search backward: ",
2259 doc: /* Search backward from point for STRING, ignoring differences in punctuation.
2260 Set point to the beginning of the occurrence found, and return point.
2261 An optional second argument bounds the search; it is a buffer position.
2262 The match found must not extend before that position.
2263 Optional third argument, if t, means if fail just return nil (no error).
2264 If not nil and not t, move to limit of search and return nil.
2265 Optional fourth argument is repeat count--search for successive occurrences. */)
2266 (string, bound, noerror, count)
2267 Lisp_Object string, bound, noerror, count;
2268 {
2269 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
2270 }
2271
2272 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
2273 "sWord search: ",
2274 doc: /* Search forward from point for STRING, ignoring differences in punctuation.
2275 Set point to the end of the occurrence found, and return point.
2276 An optional second argument bounds the search; it is a buffer position.
2277 The match found must not extend after that position.
2278 Optional third argument, if t, means if fail just return nil (no error).
2279 If not nil and not t, move to limit of search and return nil.
2280 Optional fourth argument is repeat count--search for successive occurrences. */)
2281 (string, bound, noerror, count)
2282 Lisp_Object string, bound, noerror, count;
2283 {
2284 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
2285 }
2286
2287 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
2288 "sRE search backward: ",
2289 doc: /* Search backward from point for match for regular expression REGEXP.
2290 Set point to the beginning of the match, and return point.
2291 The match found is the one starting last in the buffer
2292 and yet ending before the origin of the search.
2293 An optional second argument bounds the search; it is a buffer position.
2294 The match found must start at or after that position.
2295 Optional third argument, if t, means if fail just return nil (no error).
2296 If not nil and not t, move to limit of search and return nil.
2297 Optional fourth argument is repeat count--search for successive occurrences.
2298 See also the functions `match-beginning', `match-end', `match-string',
2299 and `replace-match'. */)
2300 (regexp, bound, noerror, count)
2301 Lisp_Object regexp, bound, noerror, count;
2302 {
2303 return search_command (regexp, bound, noerror, count, -1, 1, 0);
2304 }
2305
2306 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
2307 "sRE search: ",
2308 doc: /* Search forward from point for regular expression REGEXP.
2309 Set point to the end of the occurrence found, and return point.
2310 An optional second argument bounds the search; it is a buffer position.
2311 The match found must not extend after that position.
2312 Optional third argument, if t, means if fail just return nil (no error).
2313 If not nil and not t, move to limit of search and return nil.
2314 Optional fourth argument is repeat count--search for successive occurrences.
2315 See also the functions `match-beginning', `match-end', `match-string',
2316 and `replace-match'. */)
2317 (regexp, bound, noerror, count)
2318 Lisp_Object regexp, bound, noerror, count;
2319 {
2320 return search_command (regexp, bound, noerror, count, 1, 1, 0);
2321 }
2322
2323 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
2324 "sPosix search backward: ",
2325 doc: /* Search backward from point for match for regular expression REGEXP.
2326 Find the longest match in accord with Posix regular expression rules.
2327 Set point to the beginning of the match, and return point.
2328 The match found is the one starting last in the buffer
2329 and yet ending before the origin of the search.
2330 An optional second argument bounds the search; it is a buffer position.
2331 The match found must start at or after that position.
2332 Optional third argument, if t, means if fail just return nil (no error).
2333 If not nil and not t, move to limit of search and return nil.
2334 Optional fourth argument is repeat count--search for successive occurrences.
2335 See also the functions `match-beginning', `match-end', `match-string',
2336 and `replace-match'. */)
2337 (regexp, bound, noerror, count)
2338 Lisp_Object regexp, bound, noerror, count;
2339 {
2340 return search_command (regexp, bound, noerror, count, -1, 1, 1);
2341 }
2342
2343 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
2344 "sPosix search: ",
2345 doc: /* Search forward from point for regular expression REGEXP.
2346 Find the longest match in accord with Posix regular expression rules.
2347 Set point to the end of the occurrence found, and return point.
2348 An optional second argument bounds the search; it is a buffer position.
2349 The match found must not extend after that position.
2350 Optional third argument, if t, means if fail just return nil (no error).
2351 If not nil and not t, move to limit of search and return nil.
2352 Optional fourth argument is repeat count--search for successive occurrences.
2353 See also the functions `match-beginning', `match-end', `match-string',
2354 and `replace-match'. */)
2355 (regexp, bound, noerror, count)
2356 Lisp_Object regexp, bound, noerror, count;
2357 {
2358 return search_command (regexp, bound, noerror, count, 1, 1, 1);
2359 }
2360 \f
2361 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
2362 doc: /* Replace text matched by last search with NEWTEXT.
2363 Leave point at the end of the replacement text.
2364
2365 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
2366 Otherwise maybe capitalize the whole text, or maybe just word initials,
2367 based on the replaced text.
2368 If the replaced text has only capital letters
2369 and has at least one multiletter word, convert NEWTEXT to all caps.
2370 Otherwise if all words are capitalized in the replaced text,
2371 capitalize each word in NEWTEXT.
2372
2373 If third arg LITERAL is non-nil, insert NEWTEXT literally.
2374 Otherwise treat `\\' as special:
2375 `\\&' in NEWTEXT means substitute original matched text.
2376 `\\N' means substitute what matched the Nth `\\(...\\)'.
2377 If Nth parens didn't match, substitute nothing.
2378 `\\\\' means insert one `\\'.
2379 Case conversion does not apply to these substitutions.
2380
2381 FIXEDCASE and LITERAL are optional arguments.
2382
2383 The optional fourth argument STRING can be a string to modify.
2384 This is meaningful when the previous match was done against STRING,
2385 using `string-match'. When used this way, `replace-match'
2386 creates and returns a new string made by copying STRING and replacing
2387 the part of STRING that was matched.
2388
2389 The optional fifth argument SUBEXP specifies a subexpression;
2390 it says to replace just that subexpression with NEWTEXT,
2391 rather than replacing the entire matched text.
2392 This is, in a vague sense, the inverse of using `\\N' in NEWTEXT;
2393 `\\N' copies subexp N into NEWTEXT, but using N as SUBEXP puts
2394 NEWTEXT in place of subexp N.
2395 This is useful only after a regular expression search or match,
2396 since only regular expressions have distinguished subexpressions. */)
2397 (newtext, fixedcase, literal, string, subexp)
2398 Lisp_Object newtext, fixedcase, literal, string, subexp;
2399 {
2400 enum { nochange, all_caps, cap_initial } case_action;
2401 register int pos, pos_byte;
2402 int some_multiletter_word;
2403 int some_lowercase;
2404 int some_uppercase;
2405 int some_nonuppercase_initial;
2406 register int c, prevc;
2407 int sub;
2408 int opoint, newpoint;
2409
2410 CHECK_STRING (newtext);
2411
2412 if (! NILP (string))
2413 CHECK_STRING (string);
2414
2415 case_action = nochange; /* We tried an initialization */
2416 /* but some C compilers blew it */
2417
2418 if (search_regs.num_regs <= 0)
2419 error ("`replace-match' called before any match found");
2420
2421 if (NILP (subexp))
2422 sub = 0;
2423 else
2424 {
2425 CHECK_NUMBER (subexp);
2426 sub = XINT (subexp);
2427 if (sub < 0 || sub >= search_regs.num_regs)
2428 args_out_of_range (subexp, make_number (search_regs.num_regs));
2429 }
2430
2431 if (NILP (string))
2432 {
2433 if (search_regs.start[sub] < BEGV
2434 || search_regs.start[sub] > search_regs.end[sub]
2435 || search_regs.end[sub] > ZV)
2436 args_out_of_range (make_number (search_regs.start[sub]),
2437 make_number (search_regs.end[sub]));
2438 }
2439 else
2440 {
2441 if (search_regs.start[sub] < 0
2442 || search_regs.start[sub] > search_regs.end[sub]
2443 || search_regs.end[sub] > SCHARS (string))
2444 args_out_of_range (make_number (search_regs.start[sub]),
2445 make_number (search_regs.end[sub]));
2446 }
2447
2448 if (NILP (fixedcase))
2449 {
2450 /* Decide how to casify by examining the matched text. */
2451 int last;
2452
2453 pos = search_regs.start[sub];
2454 last = search_regs.end[sub];
2455
2456 if (NILP (string))
2457 pos_byte = CHAR_TO_BYTE (pos);
2458 else
2459 pos_byte = string_char_to_byte (string, pos);
2460
2461 prevc = '\n';
2462 case_action = all_caps;
2463
2464 /* some_multiletter_word is set nonzero if any original word
2465 is more than one letter long. */
2466 some_multiletter_word = 0;
2467 some_lowercase = 0;
2468 some_nonuppercase_initial = 0;
2469 some_uppercase = 0;
2470
2471 while (pos < last)
2472 {
2473 if (NILP (string))
2474 {
2475 c = FETCH_CHAR (pos_byte);
2476 INC_BOTH (pos, pos_byte);
2477 }
2478 else
2479 FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
2480
2481 if (LOWERCASEP (c))
2482 {
2483 /* Cannot be all caps if any original char is lower case */
2484
2485 some_lowercase = 1;
2486 if (SYNTAX (prevc) != Sword)
2487 some_nonuppercase_initial = 1;
2488 else
2489 some_multiletter_word = 1;
2490 }
2491 else if (UPPERCASEP (c))
2492 {
2493 some_uppercase = 1;
2494 if (SYNTAX (prevc) != Sword)
2495 ;
2496 else
2497 some_multiletter_word = 1;
2498 }
2499 else
2500 {
2501 /* If the initial is a caseless word constituent,
2502 treat that like a lowercase initial. */
2503 if (SYNTAX (prevc) != Sword)
2504 some_nonuppercase_initial = 1;
2505 }
2506
2507 prevc = c;
2508 }
2509
2510 /* Convert to all caps if the old text is all caps
2511 and has at least one multiletter word. */
2512 if (! some_lowercase && some_multiletter_word)
2513 case_action = all_caps;
2514 /* Capitalize each word, if the old text has all capitalized words. */
2515 else if (!some_nonuppercase_initial && some_multiletter_word)
2516 case_action = cap_initial;
2517 else if (!some_nonuppercase_initial && some_uppercase)
2518 /* Should x -> yz, operating on X, give Yz or YZ?
2519 We'll assume the latter. */
2520 case_action = all_caps;
2521 else
2522 case_action = nochange;
2523 }
2524
2525 /* Do replacement in a string. */
2526 if (!NILP (string))
2527 {
2528 Lisp_Object before, after;
2529
2530 before = Fsubstring (string, make_number (0),
2531 make_number (search_regs.start[sub]));
2532 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
2533
2534 /* Substitute parts of the match into NEWTEXT
2535 if desired. */
2536 if (NILP (literal))
2537 {
2538 int lastpos = 0;
2539 int lastpos_byte = 0;
2540 /* We build up the substituted string in ACCUM. */
2541 Lisp_Object accum;
2542 Lisp_Object middle;
2543 int length = SBYTES (newtext);
2544
2545 accum = Qnil;
2546
2547 for (pos_byte = 0, pos = 0; pos_byte < length;)
2548 {
2549 int substart = -1;
2550 int subend = 0;
2551 int delbackslash = 0;
2552
2553 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2554
2555 if (c == '\\')
2556 {
2557 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
2558
2559 if (c == '&')
2560 {
2561 substart = search_regs.start[sub];
2562 subend = search_regs.end[sub];
2563 }
2564 else if (c >= '1' && c <= '9')
2565 {
2566 if (search_regs.start[c - '0'] >= 0
2567 && c <= search_regs.num_regs + '0')
2568 {
2569 substart = search_regs.start[c - '0'];
2570 subend = search_regs.end[c - '0'];
2571 }
2572 else
2573 {
2574 /* If that subexp did not match,
2575 replace \\N with nothing. */
2576 substart = 0;
2577 subend = 0;
2578 }
2579 }
2580 else if (c == '\\')
2581 delbackslash = 1;
2582 else
2583 error ("Invalid use of `\\' in replacement text");
2584 }
2585 if (substart >= 0)
2586 {
2587 if (pos - 2 != lastpos)
2588 middle = substring_both (newtext, lastpos,
2589 lastpos_byte,
2590 pos - 2, pos_byte - 2);
2591 else
2592 middle = Qnil;
2593 accum = concat3 (accum, middle,
2594 Fsubstring (string,
2595 make_number (substart),
2596 make_number (subend)));
2597 lastpos = pos;
2598 lastpos_byte = pos_byte;
2599 }
2600 else if (delbackslash)
2601 {
2602 middle = substring_both (newtext, lastpos,
2603 lastpos_byte,
2604 pos - 1, pos_byte - 1);
2605
2606 accum = concat2 (accum, middle);
2607 lastpos = pos;
2608 lastpos_byte = pos_byte;
2609 }
2610 }
2611
2612 if (pos != lastpos)
2613 middle = substring_both (newtext, lastpos,
2614 lastpos_byte,
2615 pos, pos_byte);
2616 else
2617 middle = Qnil;
2618
2619 newtext = concat2 (accum, middle);
2620 }
2621
2622 /* Do case substitution in NEWTEXT if desired. */
2623 if (case_action == all_caps)
2624 newtext = Fupcase (newtext);
2625 else if (case_action == cap_initial)
2626 newtext = Fupcase_initials (newtext);
2627
2628 return concat3 (before, newtext, after);
2629 }
2630
2631 /* Record point, then move (quietly) to the start of the match. */
2632 if (PT >= search_regs.end[sub])
2633 opoint = PT - ZV;
2634 else if (PT > search_regs.start[sub])
2635 opoint = search_regs.end[sub] - ZV;
2636 else
2637 opoint = PT;
2638
2639 /* If we want non-literal replacement,
2640 perform substitution on the replacement string. */
2641 if (NILP (literal))
2642 {
2643 int length = SBYTES (newtext);
2644 unsigned char *substed;
2645 int substed_alloc_size, substed_len;
2646 int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
2647 int str_multibyte = STRING_MULTIBYTE (newtext);
2648 Lisp_Object rev_tbl;
2649 int really_changed = 0;
2650
2651 rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
2652 ? Fchar_table_extra_slot (Vnonascii_translation_table,
2653 make_number (0))
2654 : Qnil);
2655
2656 substed_alloc_size = length * 2 + 100;
2657 substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
2658 substed_len = 0;
2659
2660 /* Go thru NEWTEXT, producing the actual text to insert in
2661 SUBSTED while adjusting multibyteness to that of the current
2662 buffer. */
2663
2664 for (pos_byte = 0, pos = 0; pos_byte < length;)
2665 {
2666 unsigned char str[MAX_MULTIBYTE_LENGTH];
2667 unsigned char *add_stuff = NULL;
2668 int add_len = 0;
2669 int idx = -1;
2670
2671 if (str_multibyte)
2672 {
2673 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
2674 if (!buf_multibyte)
2675 c = multibyte_char_to_unibyte (c, rev_tbl);
2676 }
2677 else
2678 {
2679 /* Note that we don't have to increment POS. */
2680 c = SREF (newtext, pos_byte++);
2681 if (buf_multibyte)
2682 c = unibyte_char_to_multibyte (c);
2683 }
2684
2685 /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
2686 or set IDX to a match index, which means put that part
2687 of the buffer text into SUBSTED. */
2688
2689 if (c == '\\')
2690 {
2691 really_changed = 1;
2692
2693 if (str_multibyte)
2694 {
2695 FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
2696 pos, pos_byte);
2697 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
2698 c = multibyte_char_to_unibyte (c, rev_tbl);
2699 }
2700 else
2701 {
2702 c = SREF (newtext, pos_byte++);
2703 if (buf_multibyte)
2704 c = unibyte_char_to_multibyte (c);
2705 }
2706
2707 if (c == '&')
2708 idx = sub;
2709 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
2710 {
2711 if (search_regs.start[c - '0'] >= 1)
2712 idx = c - '0';
2713 }
2714 else if (c == '\\')
2715 add_len = 1, add_stuff = "\\";
2716 else
2717 {
2718 xfree (substed);
2719 error ("Invalid use of `\\' in replacement text");
2720 }
2721 }
2722 else
2723 {
2724 add_len = CHAR_STRING (c, str);
2725 add_stuff = str;
2726 }
2727
2728 /* If we want to copy part of a previous match,
2729 set up ADD_STUFF and ADD_LEN to point to it. */
2730 if (idx >= 0)
2731 {
2732 int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
2733 add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
2734 if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
2735 move_gap (search_regs.start[idx]);
2736 add_stuff = BYTE_POS_ADDR (begbyte);
2737 }
2738
2739 /* Now the stuff we want to add to SUBSTED
2740 is invariably ADD_LEN bytes starting at ADD_STUFF. */
2741
2742 /* Make sure SUBSTED is big enough. */
2743 if (substed_len + add_len >= substed_alloc_size)
2744 {
2745 substed_alloc_size = substed_len + add_len + 500;
2746 substed = (unsigned char *) xrealloc (substed,
2747 substed_alloc_size + 1);
2748 }
2749
2750 /* Now add to the end of SUBSTED. */
2751 if (add_stuff)
2752 {
2753 bcopy (add_stuff, substed + substed_len, add_len);
2754 substed_len += add_len;
2755 }
2756 }
2757
2758 if (really_changed)
2759 {
2760 if (buf_multibyte)
2761 {
2762 int nchars = multibyte_chars_in_text (substed, substed_len);
2763
2764 newtext = make_multibyte_string (substed, nchars, substed_len);
2765 }
2766 else
2767 newtext = make_unibyte_string (substed, substed_len);
2768 }
2769 xfree (substed);
2770 }
2771
2772 /* Replace the old text with the new in the cleanest possible way. */
2773 replace_range (search_regs.start[sub], search_regs.end[sub],
2774 newtext, 1, 0, 1);
2775 newpoint = search_regs.start[sub] + SCHARS (newtext);
2776
2777 if (case_action == all_caps)
2778 Fupcase_region (make_number (search_regs.start[sub]),
2779 make_number (newpoint));
2780 else if (case_action == cap_initial)
2781 Fupcase_initials_region (make_number (search_regs.start[sub]),
2782 make_number (newpoint));
2783
2784 /* Adjust search data for this change. */
2785 {
2786 int oldend = search_regs.end[sub];
2787 int oldstart = search_regs.start[sub];
2788 int change = newpoint - search_regs.end[sub];
2789 int i;
2790
2791 for (i = 0; i < search_regs.num_regs; i++)
2792 {
2793 if (search_regs.start[i] >= oldend)
2794 search_regs.start[i] += change;
2795 else if (search_regs.start[i] > oldstart)
2796 search_regs.start[i] = oldstart;
2797 if (search_regs.end[i] >= oldend)
2798 search_regs.end[i] += change;
2799 else if (search_regs.end[i] > oldstart)
2800 search_regs.end[i] = oldstart;
2801 }
2802 }
2803
2804 /* Put point back where it was in the text. */
2805 if (opoint <= 0)
2806 TEMP_SET_PT (opoint + ZV);
2807 else
2808 TEMP_SET_PT (opoint);
2809
2810 /* Now move point "officially" to the start of the inserted replacement. */
2811 move_if_not_intangible (newpoint);
2812
2813 return Qnil;
2814 }
2815 \f
2816 static Lisp_Object
2817 match_limit (num, beginningp)
2818 Lisp_Object num;
2819 int beginningp;
2820 {
2821 register int n;
2822
2823 CHECK_NUMBER (num);
2824 n = XINT (num);
2825 if (n < 0)
2826 args_out_of_range (num, make_number (0));
2827 if (search_regs.num_regs <= 0)
2828 error ("No match data, because no search succeeded");
2829 if (n >= search_regs.num_regs
2830 || search_regs.start[n] < 0)
2831 return Qnil;
2832 return (make_number ((beginningp) ? search_regs.start[n]
2833 : search_regs.end[n]));
2834 }
2835
2836 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
2837 doc: /* Return position of start of text matched by last search.
2838 SUBEXP, a number, specifies which parenthesized expression in the last
2839 regexp.
2840 Value is nil if SUBEXPth pair didn't match, or there were less than
2841 SUBEXP pairs.
2842 Zero means the entire text matched by the whole regexp or whole string. */)
2843 (subexp)
2844 Lisp_Object subexp;
2845 {
2846 return match_limit (subexp, 1);
2847 }
2848
2849 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
2850 doc: /* Return position of end of text matched by last search.
2851 SUBEXP, a number, specifies which parenthesized expression in the last
2852 regexp.
2853 Value is nil if SUBEXPth pair didn't match, or there were less than
2854 SUBEXP pairs.
2855 Zero means the entire text matched by the whole regexp or whole string. */)
2856 (subexp)
2857 Lisp_Object subexp;
2858 {
2859 return match_limit (subexp, 0);
2860 }
2861
2862 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 3, 0,
2863 doc: /* Return a list containing all info on what the last search matched.
2864 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.
2865 All the elements are markers or nil (nil if the Nth pair didn't match)
2866 if the last match was on a buffer; integers or nil if a string was matched.
2867 Use `store-match-data' to reinstate the data in this list.
2868
2869 If INTEGERS (the optional first argument) is non-nil, always use
2870 integers \(rather than markers) to represent buffer positions. In
2871 this case, and if the last match was in a buffer, the buffer will get
2872 stored as one additional element at the end of the list.
2873
2874 If REUSE is a list, reuse it as part of the value. If REUSE is long
2875 enough to hold all the values, and if INTEGERS is non-nil, no consing
2876 is done.
2877
2878 If optional third arg RESEAT is non-nil, any previous markers on the
2879 REUSE list will be modified to point to nowhere.
2880
2881 Return value is undefined if the last search failed. */)
2882 (integers, reuse, reseat)
2883 Lisp_Object integers, reuse, reseat;
2884 {
2885 Lisp_Object tail, prev;
2886 Lisp_Object *data;
2887 int i, len;
2888
2889 if (!NILP (reseat))
2890 for (tail = reuse; CONSP (tail); tail = XCDR (tail))
2891 if (MARKERP (XCAR (tail)))
2892 {
2893 unchain_marker (XMARKER (XCAR (tail)));
2894 XSETCAR (tail, Qnil);
2895 }
2896
2897 if (NILP (last_thing_searched))
2898 return Qnil;
2899
2900 prev = Qnil;
2901
2902 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
2903 * sizeof (Lisp_Object));
2904
2905 len = 0;
2906 for (i = 0; i < search_regs.num_regs; i++)
2907 {
2908 int start = search_regs.start[i];
2909 if (start >= 0)
2910 {
2911 if (EQ (last_thing_searched, Qt)
2912 || ! NILP (integers))
2913 {
2914 XSETFASTINT (data[2 * i], start);
2915 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
2916 }
2917 else if (BUFFERP (last_thing_searched))
2918 {
2919 data[2 * i] = Fmake_marker ();
2920 Fset_marker (data[2 * i],
2921 make_number (start),
2922 last_thing_searched);
2923 data[2 * i + 1] = Fmake_marker ();
2924 Fset_marker (data[2 * i + 1],
2925 make_number (search_regs.end[i]),
2926 last_thing_searched);
2927 }
2928 else
2929 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
2930 abort ();
2931
2932 len = 2 * i + 2;
2933 }
2934 else
2935 data[2 * i] = data[2 * i + 1] = Qnil;
2936 }
2937
2938 if (BUFFERP (last_thing_searched) && !NILP (integers))
2939 {
2940 data[len] = last_thing_searched;
2941 len++;
2942 }
2943
2944 /* If REUSE is not usable, cons up the values and return them. */
2945 if (! CONSP (reuse))
2946 return Flist (len, data);
2947
2948 /* If REUSE is a list, store as many value elements as will fit
2949 into the elements of REUSE. */
2950 for (i = 0, tail = reuse; CONSP (tail);
2951 i++, tail = XCDR (tail))
2952 {
2953 if (i < len)
2954 XSETCAR (tail, data[i]);
2955 else
2956 XSETCAR (tail, Qnil);
2957 prev = tail;
2958 }
2959
2960 /* If we couldn't fit all value elements into REUSE,
2961 cons up the rest of them and add them to the end of REUSE. */
2962 if (i < len)
2963 XSETCDR (prev, Flist (len - i, data + i));
2964
2965 return reuse;
2966 }
2967
2968 /* We used to have an internal use variant of `reseat' described as:
2969
2970 If RESEAT is `evaporate', put the markers back on the free list
2971 immediately. No other references to the markers must exist in this
2972 case, so it is used only internally on the unwind stack and
2973 save-match-data from Lisp.
2974
2975 But it was ill-conceived: those supposedly-internal markers get exposed via
2976 the undo-list, so freeing them here is unsafe. */
2977
2978 DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
2979 doc: /* Set internal data on last search match from elements of LIST.
2980 LIST should have been created by calling `match-data' previously.
2981
2982 If optional arg RESEAT is non-nil, make markers on LIST point nowhere. */)
2983 (list, reseat)
2984 register Lisp_Object list, reseat;
2985 {
2986 register int i;
2987 register Lisp_Object marker;
2988
2989 if (running_asynch_code)
2990 save_search_regs ();
2991
2992 CHECK_LIST (list);
2993
2994 /* Unless we find a marker with a buffer or an explicit buffer
2995 in LIST, assume that this match data came from a string. */
2996 last_thing_searched = Qt;
2997
2998 /* Allocate registers if they don't already exist. */
2999 {
3000 int length = XFASTINT (Flength (list)) / 2;
3001
3002 if (length > search_regs.num_regs)
3003 {
3004 if (search_regs.num_regs == 0)
3005 {
3006 search_regs.start
3007 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3008 search_regs.end
3009 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
3010 }
3011 else
3012 {
3013 search_regs.start
3014 = (regoff_t *) xrealloc (search_regs.start,
3015 length * sizeof (regoff_t));
3016 search_regs.end
3017 = (regoff_t *) xrealloc (search_regs.end,
3018 length * sizeof (regoff_t));
3019 }
3020
3021 for (i = search_regs.num_regs; i < length; i++)
3022 search_regs.start[i] = -1;
3023
3024 search_regs.num_regs = length;
3025 }
3026
3027 for (i = 0; CONSP (list); i++)
3028 {
3029 marker = XCAR (list);
3030 if (BUFFERP (marker))
3031 {
3032 last_thing_searched = marker;
3033 break;
3034 }
3035 if (i >= length)
3036 break;
3037 if (NILP (marker))
3038 {
3039 search_regs.start[i] = -1;
3040 list = XCDR (list);
3041 }
3042 else
3043 {
3044 int from;
3045 Lisp_Object m;
3046
3047 m = marker;
3048 if (MARKERP (marker))
3049 {
3050 if (XMARKER (marker)->buffer == 0)
3051 XSETFASTINT (marker, 0);
3052 else
3053 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
3054 }
3055
3056 CHECK_NUMBER_COERCE_MARKER (marker);
3057 from = XINT (marker);
3058
3059 if (!NILP (reseat) && MARKERP (m))
3060 {
3061 unchain_marker (XMARKER (m));
3062 XSETCAR (list, Qnil);
3063 }
3064
3065 if ((list = XCDR (list), !CONSP (list)))
3066 break;
3067
3068 m = marker = XCAR (list);
3069
3070 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
3071 XSETFASTINT (marker, 0);
3072
3073 CHECK_NUMBER_COERCE_MARKER (marker);
3074 search_regs.start[i] = from;
3075 search_regs.end[i] = XINT (marker);
3076
3077 if (!NILP (reseat) && MARKERP (m))
3078 {
3079 unchain_marker (XMARKER (m));
3080 XSETCAR (list, Qnil);
3081 }
3082 }
3083 list = XCDR (list);
3084 }
3085
3086 for (; i < search_regs.num_regs; i++)
3087 search_regs.start[i] = -1;
3088 }
3089
3090 return Qnil;
3091 }
3092
3093 /* If non-zero the match data have been saved in saved_search_regs
3094 during the execution of a sentinel or filter. */
3095 static int search_regs_saved;
3096 static struct re_registers saved_search_regs;
3097 static Lisp_Object saved_last_thing_searched;
3098
3099 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
3100 if asynchronous code (filter or sentinel) is running. */
3101 static void
3102 save_search_regs ()
3103 {
3104 if (!search_regs_saved)
3105 {
3106 saved_search_regs.num_regs = search_regs.num_regs;
3107 saved_search_regs.start = search_regs.start;
3108 saved_search_regs.end = search_regs.end;
3109 saved_last_thing_searched = last_thing_searched;
3110 last_thing_searched = Qnil;
3111 search_regs.num_regs = 0;
3112 search_regs.start = 0;
3113 search_regs.end = 0;
3114
3115 search_regs_saved = 1;
3116 }
3117 }
3118
3119 /* Called upon exit from filters and sentinels. */
3120 void
3121 restore_search_regs ()
3122 {
3123 if (search_regs_saved)
3124 {
3125 if (search_regs.num_regs > 0)
3126 {
3127 xfree (search_regs.start);
3128 xfree (search_regs.end);
3129 }
3130 search_regs.num_regs = saved_search_regs.num_regs;
3131 search_regs.start = saved_search_regs.start;
3132 search_regs.end = saved_search_regs.end;
3133 last_thing_searched = saved_last_thing_searched;
3134 saved_last_thing_searched = Qnil;
3135 search_regs_saved = 0;
3136 }
3137 }
3138
3139 static Lisp_Object
3140 unwind_set_match_data (list)
3141 Lisp_Object list;
3142 {
3143 /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
3144 return Fset_match_data (list, Qt);
3145 }
3146
3147 /* Called to unwind protect the match data. */
3148 void
3149 record_unwind_save_match_data ()
3150 {
3151 record_unwind_protect (unwind_set_match_data,
3152 Fmatch_data (Qnil, Qnil, Qnil));
3153 }
3154
3155 /* Quote a string to inactivate reg-expr chars */
3156
3157 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
3158 doc: /* Return a regexp string which matches exactly STRING and nothing else. */)
3159 (string)
3160 Lisp_Object string;
3161 {
3162 register unsigned char *in, *out, *end;
3163 register unsigned char *temp;
3164 int backslashes_added = 0;
3165
3166 CHECK_STRING (string);
3167
3168 temp = (unsigned char *) alloca (SBYTES (string) * 2);
3169
3170 /* Now copy the data into the new string, inserting escapes. */
3171
3172 in = SDATA (string);
3173 end = in + SBYTES (string);
3174 out = temp;
3175
3176 for (; in != end; in++)
3177 {
3178 if (*in == '['
3179 || *in == '*' || *in == '.' || *in == '\\'
3180 || *in == '?' || *in == '+'
3181 || *in == '^' || *in == '$')
3182 *out++ = '\\', backslashes_added++;
3183 *out++ = *in;
3184 }
3185
3186 return make_specified_string (temp,
3187 SCHARS (string) + backslashes_added,
3188 out - temp,
3189 STRING_MULTIBYTE (string));
3190 }
3191 \f
3192 void
3193 syms_of_search ()
3194 {
3195 register int i;
3196
3197 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
3198 {
3199 searchbufs[i].buf.allocated = 100;
3200 searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
3201 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
3202 searchbufs[i].regexp = Qnil;
3203 searchbufs[i].whitespace_regexp = Qnil;
3204 searchbufs[i].syntax_table = Qnil;
3205 staticpro (&searchbufs[i].regexp);
3206 staticpro (&searchbufs[i].whitespace_regexp);
3207 staticpro (&searchbufs[i].syntax_table);
3208 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
3209 }
3210 searchbuf_head = &searchbufs[0];
3211
3212 Qsearch_failed = intern ("search-failed");
3213 staticpro (&Qsearch_failed);
3214 Qinvalid_regexp = intern ("invalid-regexp");
3215 staticpro (&Qinvalid_regexp);
3216
3217 Fput (Qsearch_failed, Qerror_conditions,
3218 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
3219 Fput (Qsearch_failed, Qerror_message,
3220 build_string ("Search failed"));
3221
3222 Fput (Qinvalid_regexp, Qerror_conditions,
3223 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
3224 Fput (Qinvalid_regexp, Qerror_message,
3225 build_string ("Invalid regexp"));
3226
3227 last_thing_searched = Qnil;
3228 staticpro (&last_thing_searched);
3229
3230 saved_last_thing_searched = Qnil;
3231 staticpro (&saved_last_thing_searched);
3232
3233 DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
3234 doc: /* Regexp to substitute for bunches of spaces in regexp search.
3235 Some commands use this for user-specified regexps.
3236 Spaces that occur inside character classes or repetition operators
3237 or other such regexp constructs are not replaced with this.
3238 A value of nil (which is the normal value) means treat spaces literally. */);
3239 Vsearch_spaces_regexp = Qnil;
3240
3241 DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
3242 doc: /* Internal use only.
3243 If non-nil, the primitive searching and matching functions
3244 such as `looking-at', `string-match', `re-search-forward', etc.,
3245 do not set the match data. The proper way to use this variable
3246 is to bind it with `let' around a small expression. */);
3247 Vinhibit_changing_match_data = Qnil;
3248
3249 defsubr (&Slooking_at);
3250 defsubr (&Sposix_looking_at);
3251 defsubr (&Sstring_match);
3252 defsubr (&Sposix_string_match);
3253 defsubr (&Ssearch_forward);
3254 defsubr (&Ssearch_backward);
3255 defsubr (&Sword_search_forward);
3256 defsubr (&Sword_search_backward);
3257 defsubr (&Sre_search_forward);
3258 defsubr (&Sre_search_backward);
3259 defsubr (&Sposix_search_forward);
3260 defsubr (&Sposix_search_backward);
3261 defsubr (&Sreplace_match);
3262 defsubr (&Smatch_beginning);
3263 defsubr (&Smatch_end);
3264 defsubr (&Smatch_data);
3265 defsubr (&Sset_match_data);
3266 defsubr (&Sregexp_quote);
3267 }
3268
3269 /* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
3270 (do not change this comment) */