]> code.delx.au - gnu-emacs/blob - src/search.c
(Fcall_interactively) <'d', 'r'>: Use set_marker_both.
[gnu-emacs] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #include "lisp.h"
24 #include "syntax.h"
25 #include "category.h"
26 #include "buffer.h"
27 #include "charset.h"
28 #include "region-cache.h"
29 #include "commands.h"
30 #include "blockinput.h"
31 #include "intervals.h"
32
33 #include <sys/types.h>
34 #include "regex.h"
35
36 #define REGEXP_CACHE_SIZE 20
37
38 /* If the regexp is non-nil, then the buffer contains the compiled form
39 of that regexp, suitable for searching. */
40 struct regexp_cache
41 {
42 struct regexp_cache *next;
43 Lisp_Object regexp;
44 struct re_pattern_buffer buf;
45 char fastmap[0400];
46 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
47 char posix;
48 };
49
50 /* The instances of that struct. */
51 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
52
53 /* The head of the linked list; points to the most recently used buffer. */
54 struct regexp_cache *searchbuf_head;
55
56
57 /* Every call to re_match, etc., must pass &search_regs as the regs
58 argument unless you can show it is unnecessary (i.e., if re_match
59 is certainly going to be called again before region-around-match
60 can be called).
61
62 Since the registers are now dynamically allocated, we need to make
63 sure not to refer to the Nth register before checking that it has
64 been allocated by checking search_regs.num_regs.
65
66 The regex code keeps track of whether it has allocated the search
67 buffer using bits in the re_pattern_buffer. This means that whenever
68 you compile a new pattern, it completely forgets whether it has
69 allocated any registers, and will allocate new registers the next
70 time you call a searching or matching function. Therefore, we need
71 to call re_set_registers after compiling a new pattern or after
72 setting the match registers, so that the regex functions will be
73 able to free or re-allocate it properly. */
74 static struct re_registers search_regs;
75
76 /* The buffer in which the last search was performed, or
77 Qt if the last search was done in a string;
78 Qnil if no searching has been done yet. */
79 static Lisp_Object last_thing_searched;
80
81 /* error condition signaled when regexp compile_pattern fails */
82
83 Lisp_Object Qinvalid_regexp;
84
85 static void set_search_regs ();
86 static void save_search_regs ();
87
88 static int search_buffer ();
89
90 static void
91 matcher_overflow ()
92 {
93 error ("Stack overflow in regexp matcher");
94 }
95
96 #ifdef __STDC__
97 #define CONST const
98 #else
99 #define CONST
100 #endif
101
102 /* Compile a regexp and signal a Lisp error if anything goes wrong.
103 PATTERN is the pattern to compile.
104 CP is the place to put the result.
105 TRANSLATE is a translation table for ignoring case, or NULL for none.
106 REGP is the structure that says where to store the "register"
107 values that will result from matching this pattern.
108 If it is 0, we should compile the pattern not to record any
109 subexpression bounds.
110 POSIX is nonzero if we want full backtracking (POSIX style)
111 for this pattern. 0 means backtrack only enough to get a valid match.
112 MULTIBYTE is nonzero if we want to handle multibyte characters in
113 PATTERN. 0 means all multibyte characters are recognized just as
114 sequences of binary data. */
115
116 static void
117 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
118 struct regexp_cache *cp;
119 Lisp_Object pattern;
120 Lisp_Object *translate;
121 struct re_registers *regp;
122 int posix;
123 int multibyte;
124 {
125 char *val;
126 reg_syntax_t old;
127
128 cp->regexp = Qnil;
129 cp->buf.translate = translate;
130 cp->posix = posix;
131 cp->buf.multibyte = multibyte;
132 BLOCK_INPUT;
133 old = re_set_syntax (RE_SYNTAX_EMACS
134 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
135 val = (char *) re_compile_pattern ((char *) XSTRING (pattern)->data,
136 XSTRING (pattern)->size, &cp->buf);
137 re_set_syntax (old);
138 UNBLOCK_INPUT;
139 if (val)
140 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
141
142 cp->regexp = Fcopy_sequence (pattern);
143 }
144
145 /* Compile a regexp if necessary, but first check to see if there's one in
146 the cache.
147 PATTERN is the pattern to compile.
148 TRANSLATE is a translation table for ignoring case, or NULL for none.
149 REGP is the structure that says where to store the "register"
150 values that will result from matching this pattern.
151 If it is 0, we should compile the pattern not to record any
152 subexpression bounds.
153 POSIX is nonzero if we want full backtracking (POSIX style)
154 for this pattern. 0 means backtrack only enough to get a valid match. */
155
156 struct re_pattern_buffer *
157 compile_pattern (pattern, regp, translate, posix)
158 Lisp_Object pattern;
159 struct re_registers *regp;
160 Lisp_Object *translate;
161 int posix;
162 {
163 struct regexp_cache *cp, **cpp;
164 /* Should we check it here, or add an argument `multibyte' to this
165 function? */
166 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
167
168 for (cpp = &searchbuf_head; ; cpp = &cp->next)
169 {
170 cp = *cpp;
171 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
172 && !NILP (Fstring_equal (cp->regexp, pattern))
173 && cp->buf.translate == translate
174 && cp->posix == posix
175 && cp->buf.multibyte == multibyte)
176 break;
177
178 /* If we're at the end of the cache, compile into the last cell. */
179 if (cp->next == 0)
180 {
181 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
182 break;
183 }
184 }
185
186 /* When we get here, cp (aka *cpp) contains the compiled pattern,
187 either because we found it in the cache or because we just compiled it.
188 Move it to the front of the queue to mark it as most recently used. */
189 *cpp = cp->next;
190 cp->next = searchbuf_head;
191 searchbuf_head = cp;
192
193 /* Advise the searching functions about the space we have allocated
194 for register data. */
195 if (regp)
196 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
197
198 return &cp->buf;
199 }
200
201 /* Error condition used for failing searches */
202 Lisp_Object Qsearch_failed;
203
204 Lisp_Object
205 signal_failure (arg)
206 Lisp_Object arg;
207 {
208 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
209 return Qnil;
210 }
211 \f
212 static Lisp_Object
213 looking_at_1 (string, posix)
214 Lisp_Object string;
215 int posix;
216 {
217 Lisp_Object val;
218 unsigned char *p1, *p2;
219 int s1, s2;
220 register int i;
221 struct re_pattern_buffer *bufp;
222
223 if (running_asynch_code)
224 save_search_regs ();
225
226 CHECK_STRING (string, 0);
227 bufp = compile_pattern (string, &search_regs,
228 (!NILP (current_buffer->case_fold_search)
229 ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
230 posix);
231
232 immediate_quit = 1;
233 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
234
235 /* Get pointers and sizes of the two strings
236 that make up the visible portion of the buffer. */
237
238 p1 = BEGV_ADDR;
239 s1 = GPT_BYTE - BEGV_BYTE;
240 p2 = GAP_END_ADDR;
241 s2 = ZV_BYTE - GPT_BYTE;
242 if (s1 < 0)
243 {
244 p2 = p1;
245 s2 = ZV_BYTE - BEGV_BYTE;
246 s1 = 0;
247 }
248 if (s2 < 0)
249 {
250 s1 = ZV_BYTE - BEGV_BYTE;
251 s2 = 0;
252 }
253
254 re_match_object = Qnil;
255
256 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
257 PT_BYTE - BEGV_BYTE, &search_regs,
258 ZV_BYTE - BEGV_BYTE);
259 if (i == -2)
260 matcher_overflow ();
261
262 val = (0 <= i ? Qt : Qnil);
263 if (i >= 0)
264 for (i = 0; i < search_regs.num_regs; i++)
265 if (search_regs.start[i] >= 0)
266 {
267 search_regs.start[i]
268 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
269 search_regs.end[i]
270 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
271 }
272 XSETBUFFER (last_thing_searched, current_buffer);
273 immediate_quit = 0;
274 return val;
275 }
276
277 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
278 "Return t if text after point matches regular expression REGEXP.\n\
279 This function modifies the match data that `match-beginning',\n\
280 `match-end' and `match-data' access; save and restore the match\n\
281 data if you want to preserve them.")
282 (regexp)
283 Lisp_Object regexp;
284 {
285 return looking_at_1 (regexp, 0);
286 }
287
288 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
289 "Return t if text after point matches regular expression REGEXP.\n\
290 Find the longest match, in accord with Posix regular expression rules.\n\
291 This function modifies the match data that `match-beginning',\n\
292 `match-end' and `match-data' access; save and restore the match\n\
293 data if you want to preserve them.")
294 (regexp)
295 Lisp_Object regexp;
296 {
297 return looking_at_1 (regexp, 1);
298 }
299 \f
300 static Lisp_Object
301 string_match_1 (regexp, string, start, posix)
302 Lisp_Object regexp, string, start;
303 int posix;
304 {
305 int val;
306 int s;
307 struct re_pattern_buffer *bufp;
308
309 if (running_asynch_code)
310 save_search_regs ();
311
312 CHECK_STRING (regexp, 0);
313 CHECK_STRING (string, 1);
314
315 if (NILP (start))
316 s = 0;
317 else
318 {
319 int len = XSTRING (string)->size;
320
321 CHECK_NUMBER (start, 2);
322 s = XINT (start);
323 if (s < 0 && -s <= len)
324 s = len + s;
325 else if (0 > s || s > len)
326 args_out_of_range (string, start);
327 }
328
329 bufp = compile_pattern (regexp, &search_regs,
330 (!NILP (current_buffer->case_fold_search)
331 ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
332 posix);
333 immediate_quit = 1;
334 re_match_object = string;
335
336 val = re_search (bufp, (char *) XSTRING (string)->data,
337 XSTRING (string)->size, s, XSTRING (string)->size - s,
338 &search_regs);
339 immediate_quit = 0;
340 last_thing_searched = Qt;
341 if (val == -2)
342 matcher_overflow ();
343 if (val < 0) return Qnil;
344 return make_number (val);
345 }
346
347 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
348 "Return index of start of first match for REGEXP in STRING, or nil.\n\
349 If third arg START is non-nil, start search at that index in STRING.\n\
350 For index of first char beyond the match, do (match-end 0).\n\
351 `match-end' and `match-beginning' also give indices of substrings\n\
352 matched by parenthesis constructs in the pattern.")
353 (regexp, string, start)
354 Lisp_Object regexp, string, start;
355 {
356 return string_match_1 (regexp, string, start, 0);
357 }
358
359 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
360 "Return index of start of first match for REGEXP in STRING, or nil.\n\
361 Find the longest match, in accord with Posix regular expression rules.\n\
362 If third arg START is non-nil, start search at that index in STRING.\n\
363 For index of first char beyond the match, do (match-end 0).\n\
364 `match-end' and `match-beginning' also give indices of substrings\n\
365 matched by parenthesis constructs in the pattern.")
366 (regexp, string, start)
367 Lisp_Object regexp, string, start;
368 {
369 return string_match_1 (regexp, string, start, 1);
370 }
371
372 /* Match REGEXP against STRING, searching all of STRING,
373 and return the index of the match, or negative on failure.
374 This does not clobber the match data. */
375
376 int
377 fast_string_match (regexp, string)
378 Lisp_Object regexp, string;
379 {
380 int val;
381 struct re_pattern_buffer *bufp;
382
383 bufp = compile_pattern (regexp, 0, 0, 0);
384 immediate_quit = 1;
385 re_match_object = string;
386
387 val = re_search (bufp, (char *) XSTRING (string)->data,
388 XSTRING (string)->size, 0, XSTRING (string)->size,
389 0);
390 immediate_quit = 0;
391 return val;
392 }
393
394 /* Match REGEXP against STRING, searching all of STRING ignoring case,
395 and return the index of the match, or negative on failure.
396 This does not clobber the match data. */
397
398 extern Lisp_Object Vascii_downcase_table;
399
400 int
401 fast_c_string_match_ignore_case (regexp, string)
402 Lisp_Object regexp;
403 char *string;
404 {
405 int val;
406 struct re_pattern_buffer *bufp;
407 int len = strlen (string);
408
409 re_match_object = Qt;
410 bufp = compile_pattern (regexp, 0,
411 XCHAR_TABLE (Vascii_downcase_table)->contents, 0);
412 immediate_quit = 1;
413 val = re_search (bufp, string, len, 0, len, 0);
414 immediate_quit = 0;
415 return val;
416 }
417 \f
418 /* max and min. */
419
420 static int
421 max (a, b)
422 int a, b;
423 {
424 return ((a > b) ? a : b);
425 }
426
427 static int
428 min (a, b)
429 int a, b;
430 {
431 return ((a < b) ? a : b);
432 }
433
434 \f
435 /* The newline cache: remembering which sections of text have no newlines. */
436
437 /* If the user has requested newline caching, make sure it's on.
438 Otherwise, make sure it's off.
439 This is our cheezy way of associating an action with the change of
440 state of a buffer-local variable. */
441 static void
442 newline_cache_on_off (buf)
443 struct buffer *buf;
444 {
445 if (NILP (buf->cache_long_line_scans))
446 {
447 /* It should be off. */
448 if (buf->newline_cache)
449 {
450 free_region_cache (buf->newline_cache);
451 buf->newline_cache = 0;
452 }
453 }
454 else
455 {
456 /* It should be on. */
457 if (buf->newline_cache == 0)
458 buf->newline_cache = new_region_cache ();
459 }
460 }
461
462 \f
463 /* Search for COUNT instances of the character TARGET between START and END.
464
465 If COUNT is positive, search forwards; END must be >= START.
466 If COUNT is negative, search backwards for the -COUNTth instance;
467 END must be <= START.
468 If COUNT is zero, do anything you please; run rogue, for all I care.
469
470 If END is zero, use BEGV or ZV instead, as appropriate for the
471 direction indicated by COUNT.
472
473 If we find COUNT instances, set *SHORTAGE to zero, and return the
474 position after the COUNTth match. Note that for reverse motion
475 this is not the same as the usual convention for Emacs motion commands.
476
477 If we don't find COUNT instances before reaching END, set *SHORTAGE
478 to the number of TARGETs left unfound, and return END.
479
480 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
481 except when inside redisplay. */
482
483 scan_buffer (target, start, end, count, shortage, allow_quit)
484 register int target;
485 int start, end;
486 int count;
487 int *shortage;
488 int allow_quit;
489 {
490 struct region_cache *newline_cache;
491 int direction;
492
493 if (count > 0)
494 {
495 direction = 1;
496 if (! end) end = ZV;
497 }
498 else
499 {
500 direction = -1;
501 if (! end) end = BEGV;
502 }
503
504 newline_cache_on_off (current_buffer);
505 newline_cache = current_buffer->newline_cache;
506
507 if (shortage != 0)
508 *shortage = 0;
509
510 immediate_quit = allow_quit;
511
512 if (count > 0)
513 while (start != end)
514 {
515 /* Our innermost scanning loop is very simple; it doesn't know
516 about gaps, buffer ends, or the newline cache. ceiling is
517 the position of the last character before the next such
518 obstacle --- the last character the dumb search loop should
519 examine. */
520 int ceiling_byte = CHAR_TO_BYTE (end) - 1;
521 int start_byte = CHAR_TO_BYTE (start);
522
523 /* If we're looking for a newline, consult the newline cache
524 to see where we can avoid some scanning. */
525 if (target == '\n' && newline_cache)
526 {
527 int next_change;
528 immediate_quit = 0;
529 while (region_cache_forward
530 (current_buffer, newline_cache, start_byte, &next_change))
531 start_byte = next_change;
532 immediate_quit = allow_quit;
533
534 /* START should never be after END. */
535 if (start_byte > ceiling_byte)
536 start_byte = ceiling_byte;
537
538 /* Now the text after start is an unknown region, and
539 next_change is the position of the next known region. */
540 ceiling_byte = min (next_change - 1, ceiling_byte);
541 }
542
543 /* The dumb loop can only scan text stored in contiguous
544 bytes. BUFFER_CEILING_OF returns the last character
545 position that is contiguous, so the ceiling is the
546 position after that. */
547 ceiling_byte = min (BUFFER_CEILING_OF (start_byte), ceiling_byte);
548
549 {
550 /* The termination address of the dumb loop. */
551 register unsigned char *ceiling_addr
552 = BYTE_POS_ADDR (ceiling_byte) + 1;
553 register unsigned char *cursor
554 = BYTE_POS_ADDR (start_byte);
555 unsigned char *base = cursor;
556
557 while (cursor < ceiling_addr)
558 {
559 unsigned char *scan_start = cursor;
560
561 /* The dumb loop. */
562 while (*cursor != target && ++cursor < ceiling_addr)
563 ;
564
565 /* If we're looking for newlines, cache the fact that
566 the region from start to cursor is free of them. */
567 if (target == '\n' && newline_cache)
568 know_region_cache (current_buffer, newline_cache,
569 start_byte + scan_start - base,
570 start_byte + cursor - base);
571
572 /* Did we find the target character? */
573 if (cursor < ceiling_addr)
574 {
575 if (--count == 0)
576 {
577 immediate_quit = 0;
578 return BYTE_TO_CHAR (start_byte + cursor - base + 1);
579 }
580 cursor++;
581 }
582 }
583
584 start = BYTE_TO_CHAR (start_byte + cursor - base);
585 }
586 }
587 else
588 while (start > end)
589 {
590 /* The last character to check before the next obstacle. */
591 int ceiling_byte = CHAR_TO_BYTE (end);
592 int start_byte = CHAR_TO_BYTE (start);
593
594 /* Consult the newline cache, if appropriate. */
595 if (target == '\n' && newline_cache)
596 {
597 int next_change;
598 immediate_quit = 0;
599 while (region_cache_backward
600 (current_buffer, newline_cache, start_byte, &next_change))
601 start_byte = next_change;
602 immediate_quit = allow_quit;
603
604 /* Start should never be at or before end. */
605 if (start_byte <= ceiling_byte)
606 start_byte = ceiling_byte + 1;
607
608 /* Now the text before start is an unknown region, and
609 next_change is the position of the next known region. */
610 ceiling_byte = max (next_change, ceiling_byte);
611 }
612
613 /* Stop scanning before the gap. */
614 ceiling_byte = max (BUFFER_FLOOR_OF (start_byte - 1), ceiling_byte);
615
616 {
617 /* The termination address of the dumb loop. */
618 register unsigned char *ceiling_addr = BYTE_POS_ADDR (ceiling_byte);
619 register unsigned char *cursor = BYTE_POS_ADDR (start_byte - 1);
620 unsigned char *base = cursor;
621
622 while (cursor >= ceiling_addr)
623 {
624 unsigned char *scan_start = cursor;
625
626 while (*cursor != target && --cursor >= ceiling_addr)
627 ;
628
629 /* If we're looking for newlines, cache the fact that
630 the region from after the cursor to start is free of them. */
631 if (target == '\n' && newline_cache)
632 know_region_cache (current_buffer, newline_cache,
633 start_byte + cursor - base,
634 start_byte + scan_start - base);
635
636 /* Did we find the target character? */
637 if (cursor >= ceiling_addr)
638 {
639 if (++count >= 0)
640 {
641 immediate_quit = 0;
642 return BYTE_TO_CHAR (start_byte + cursor - base);
643 }
644 cursor--;
645 }
646 }
647
648 start = BYTE_TO_CHAR (start_byte + cursor - base);
649 }
650 }
651
652 immediate_quit = 0;
653 if (shortage != 0)
654 *shortage = count * direction;
655 return start;
656 }
657 \f
658 /* Search for COUNT instances of a line boundary, which means either a
659 newline or (if selective display enabled) a carriage return.
660 Start at START. If COUNT is negative, search backwards.
661
662 We report the resulting position by calling TEMP_SET_PT_BOTH.
663
664 If we find COUNT instances. we position after (always after,
665 even if scanning backwards) the COUNTth match, and return 0.
666
667 If we don't find COUNT instances before reaching the end of the
668 buffer (or the beginning, if scanning backwards), we return
669 the number of line boundaries left unfound, and position at
670 the limit we bumped up against.
671
672 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
673 except in special cases. */
674
675 int
676 scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
677 int start, start_byte;
678 int limit, limit_byte;
679 register int count;
680 int allow_quit;
681 {
682 int direction = ((count > 0) ? 1 : -1);
683
684 register unsigned char *cursor;
685 unsigned char *base;
686
687 register int ceiling;
688 register unsigned char *ceiling_addr;
689
690 int old_immediate_quit = immediate_quit;
691
692 /* If we are not in selective display mode,
693 check only for newlines. */
694 int selective_display = (!NILP (current_buffer->selective_display)
695 && !INTEGERP (current_buffer->selective_display));
696
697 /* The code that follows is like scan_buffer
698 but checks for either newline or carriage return. */
699
700 if (allow_quit)
701 immediate_quit++;
702
703 start_byte = CHAR_TO_BYTE (start);
704
705 if (count > 0)
706 {
707 while (start_byte < limit_byte)
708 {
709 ceiling = BUFFER_CEILING_OF (start_byte);
710 ceiling = min (limit_byte - 1, ceiling);
711 ceiling_addr = BYTE_POS_ADDR (ceiling) + 1;
712 base = (cursor = BYTE_POS_ADDR (start_byte));
713 while (1)
714 {
715 while (*cursor != '\n' && ++cursor != ceiling_addr)
716 ;
717
718 if (cursor != ceiling_addr)
719 {
720 if (--count == 0)
721 {
722 immediate_quit = old_immediate_quit;
723 start_byte = start_byte + cursor - base + 1;
724 start = BYTE_TO_CHAR (start_byte);
725 TEMP_SET_PT_BOTH (start, start_byte);
726 return 0;
727 }
728 else
729 if (++cursor == ceiling_addr)
730 break;
731 }
732 else
733 break;
734 }
735 start_byte += cursor - base;
736 }
737 }
738 else
739 {
740 int start_byte = CHAR_TO_BYTE (start);
741 while (start_byte > limit_byte)
742 {
743 ceiling = BUFFER_FLOOR_OF (start_byte - 1);
744 ceiling = max (limit_byte, ceiling);
745 ceiling_addr = BYTE_POS_ADDR (ceiling) - 1;
746 base = (cursor = BYTE_POS_ADDR (start_byte - 1) + 1);
747 while (1)
748 {
749 while (--cursor != ceiling_addr && *cursor != '\n')
750 ;
751
752 if (cursor != ceiling_addr)
753 {
754 if (++count == 0)
755 {
756 immediate_quit = old_immediate_quit;
757 /* Return the position AFTER the match we found. */
758 start_byte = start_byte + cursor - base + 1;
759 start = BYTE_TO_CHAR (start_byte);
760 TEMP_SET_PT_BOTH (start, start_byte);
761 return 0;
762 }
763 }
764 else
765 break;
766 }
767 /* Here we add 1 to compensate for the last decrement
768 of CURSOR, which took it past the valid range. */
769 start_byte += cursor - base + 1;
770 }
771 }
772
773 TEMP_SET_PT_BOTH (limit, limit_byte);
774 immediate_quit = old_immediate_quit;
775
776 return count * direction;
777 }
778
779 int
780 find_next_newline_no_quit (from, cnt)
781 register int from, cnt;
782 {
783 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
784 }
785
786 /* Like find_next_newline, but returns position before the newline,
787 not after, and only search up to TO. This isn't just
788 find_next_newline (...)-1, because you might hit TO. */
789
790 int
791 find_before_next_newline (from, to, cnt)
792 int from, to, cnt;
793 {
794 int shortage;
795 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
796
797 if (shortage == 0)
798 pos--;
799
800 return pos;
801 }
802 \f
803 /* Subroutines of Lisp buffer search functions. */
804
805 static Lisp_Object
806 search_command (string, bound, noerror, count, direction, RE, posix)
807 Lisp_Object string, bound, noerror, count;
808 int direction;
809 int RE;
810 int posix;
811 {
812 register int np;
813 int lim;
814 int n = direction;
815
816 if (!NILP (count))
817 {
818 CHECK_NUMBER (count, 3);
819 n *= XINT (count);
820 }
821
822 CHECK_STRING (string, 0);
823 if (NILP (bound))
824 lim = n > 0 ? ZV : BEGV;
825 else
826 {
827 CHECK_NUMBER_COERCE_MARKER (bound, 1);
828 lim = XINT (bound);
829 if (n > 0 ? lim < PT : lim > PT)
830 error ("Invalid search bound (wrong side of point)");
831 if (lim > ZV)
832 lim = ZV;
833 if (lim < BEGV)
834 lim = BEGV;
835 }
836
837 np = search_buffer (string, PT, lim, n, RE,
838 (!NILP (current_buffer->case_fold_search)
839 ? XCHAR_TABLE (current_buffer->case_canon_table)->contents
840 : 0),
841 (!NILP (current_buffer->case_fold_search)
842 ? XCHAR_TABLE (current_buffer->case_eqv_table)->contents
843 : 0),
844 posix);
845 if (np <= 0)
846 {
847 if (NILP (noerror))
848 return signal_failure (string);
849 if (!EQ (noerror, Qt))
850 {
851 if (lim < BEGV || lim > ZV)
852 abort ();
853 SET_PT (lim);
854 return Qnil;
855 #if 0 /* This would be clean, but maybe programs depend on
856 a value of nil here. */
857 np = lim;
858 #endif
859 }
860 else
861 return Qnil;
862 }
863
864 if (np < BEGV || np > ZV)
865 abort ();
866
867 SET_PT (np);
868
869 return make_number (np);
870 }
871 \f
872 /* Return 1 if REGEXP it matches just one constant string. */
873
874 static int
875 trivial_regexp_p (regexp)
876 Lisp_Object regexp;
877 {
878 int len = XSTRING (regexp)->size;
879 unsigned char *s = XSTRING (regexp)->data;
880 unsigned char c;
881 while (--len >= 0)
882 {
883 switch (*s++)
884 {
885 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
886 return 0;
887 case '\\':
888 if (--len < 0)
889 return 0;
890 switch (*s++)
891 {
892 case '|': case '(': case ')': case '`': case '\'': case 'b':
893 case 'B': case '<': case '>': case 'w': case 'W': case 's':
894 case 'S': case '=':
895 case 'c': case 'C': /* for categoryspec and notcategoryspec */
896 case '1': case '2': case '3': case '4': case '5':
897 case '6': case '7': case '8': case '9':
898 return 0;
899 }
900 }
901 }
902 return 1;
903 }
904
905 /* Search for the n'th occurrence of STRING in the current buffer,
906 starting at position POS and stopping at position LIM,
907 treating STRING as a literal string if RE is false or as
908 a regular expression if RE is true.
909
910 If N is positive, searching is forward and LIM must be greater than POS.
911 If N is negative, searching is backward and LIM must be less than POS.
912
913 Returns -x if only N-x occurrences found (x > 0),
914 or else the position at the beginning of the Nth occurrence
915 (if searching backward) or the end (if searching forward).
916
917 POSIX is nonzero if we want full backtracking (POSIX style)
918 for this pattern. 0 means backtrack only enough to get a valid match. */
919
920 static int
921 search_buffer (string, pos, lim, n, RE, trt, inverse_trt, posix)
922 Lisp_Object string;
923 int pos;
924 int lim;
925 int n;
926 int RE;
927 Lisp_Object *trt;
928 Lisp_Object *inverse_trt;
929 int posix;
930 {
931 int len = XSTRING (string)->size;
932 unsigned char *base_pat = XSTRING (string)->data;
933 register int *BM_tab;
934 int *BM_tab_base;
935 register int direction = ((n > 0) ? 1 : -1);
936 register int dirlen;
937 int infinity, limit, k, stride_for_teases;
938 register unsigned char *pat, *cursor, *p_limit;
939 register int i, j;
940 unsigned char *p1, *p2;
941 int s1, s2;
942
943 if (running_asynch_code)
944 save_search_regs ();
945
946 /* Null string is found at starting position. */
947 if (len == 0)
948 {
949 set_search_regs (pos, 0);
950 return pos;
951 }
952
953 /* Searching 0 times means don't move. */
954 if (n == 0)
955 return pos;
956
957 if (RE && !trivial_regexp_p (string))
958 {
959 struct re_pattern_buffer *bufp;
960
961 bufp = compile_pattern (string, &search_regs, trt, posix);
962
963 immediate_quit = 1; /* Quit immediately if user types ^G,
964 because letting this function finish
965 can take too long. */
966 QUIT; /* Do a pending quit right away,
967 to avoid paradoxical behavior */
968 /* Get pointers and sizes of the two strings
969 that make up the visible portion of the buffer. */
970
971 p1 = BEGV_ADDR;
972 s1 = GPT_BYTE - BEGV_BYTE;
973 p2 = GAP_END_ADDR;
974 s2 = ZV_BYTE - GPT_BYTE;
975 if (s1 < 0)
976 {
977 p2 = p1;
978 s2 = ZV_BYTE - BEGV_BYTE;
979 s1 = 0;
980 }
981 if (s2 < 0)
982 {
983 s1 = ZV_BYTE - BEGV_BYTE;
984 s2 = 0;
985 }
986 re_match_object = Qnil;
987
988 while (n < 0)
989 {
990 int val;
991 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
992 pos - BEGV, lim - pos, &search_regs,
993 /* Don't allow match past current point */
994 pos - BEGV);
995 if (val == -2)
996 {
997 matcher_overflow ();
998 }
999 if (val >= 0)
1000 {
1001 for (i = 0; i < search_regs.num_regs; i++)
1002 if (search_regs.start[i] >= 0)
1003 {
1004 search_regs.start[i]
1005 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1006 search_regs.end[i]
1007 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1008 }
1009 XSETBUFFER (last_thing_searched, current_buffer);
1010 /* Set pos to the new position. */
1011 pos = search_regs.start[0];
1012 }
1013 else
1014 {
1015 immediate_quit = 0;
1016 return (n);
1017 }
1018 n++;
1019 }
1020 while (n > 0)
1021 {
1022 int val;
1023 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
1024 pos - BEGV, lim - pos, &search_regs,
1025 lim - BEGV);
1026 if (val == -2)
1027 {
1028 matcher_overflow ();
1029 }
1030 if (val >= 0)
1031 {
1032 for (i = 0; i < search_regs.num_regs; i++)
1033 if (search_regs.start[i] >= 0)
1034 {
1035 search_regs.start[i]
1036 = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
1037 search_regs.end[i]
1038 = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
1039 }
1040 XSETBUFFER (last_thing_searched, current_buffer);
1041 pos = search_regs.end[0];
1042 }
1043 else
1044 {
1045 immediate_quit = 0;
1046 return (0 - n);
1047 }
1048 n--;
1049 }
1050 immediate_quit = 0;
1051 return (pos);
1052 }
1053 else /* non-RE case */
1054 {
1055 int pos_byte = CHAR_TO_BYTE (pos);
1056 int lim_byte = CHAR_TO_BYTE (lim);
1057 #ifdef C_ALLOCA
1058 int BM_tab_space[0400];
1059 BM_tab = &BM_tab_space[0];
1060 #else
1061 BM_tab = (int *) alloca (0400 * sizeof (int));
1062 #endif
1063 {
1064 unsigned char *patbuf = (unsigned char *) alloca (len);
1065 pat = patbuf;
1066 while (--len >= 0)
1067 {
1068 /* If we got here and the RE flag is set, it's because we're
1069 dealing with a regexp known to be trivial, so the backslash
1070 just quotes the next character. */
1071 if (RE && *base_pat == '\\')
1072 {
1073 len--;
1074 base_pat++;
1075 }
1076 *pat++ = (trt ? XINT (trt[*base_pat++]) : *base_pat++);
1077 }
1078 len = pat - patbuf;
1079 pat = base_pat = patbuf;
1080 }
1081 /* The general approach is that we are going to maintain that we know */
1082 /* the first (closest to the present position, in whatever direction */
1083 /* we're searching) character that could possibly be the last */
1084 /* (furthest from present position) character of a valid match. We */
1085 /* advance the state of our knowledge by looking at that character */
1086 /* and seeing whether it indeed matches the last character of the */
1087 /* pattern. If it does, we take a closer look. If it does not, we */
1088 /* move our pointer (to putative last characters) as far as is */
1089 /* logically possible. This amount of movement, which I call a */
1090 /* stride, will be the length of the pattern if the actual character */
1091 /* appears nowhere in the pattern, otherwise it will be the distance */
1092 /* from the last occurrence of that character to the end of the */
1093 /* pattern. */
1094 /* As a coding trick, an enormous stride is coded into the table for */
1095 /* characters that match the last character. This allows use of only */
1096 /* a single test, a test for having gone past the end of the */
1097 /* permissible match region, to test for both possible matches (when */
1098 /* the stride goes past the end immediately) and failure to */
1099 /* match (where you get nudged past the end one stride at a time). */
1100
1101 /* Here we make a "mickey mouse" BM table. The stride of the search */
1102 /* is determined only by the last character of the putative match. */
1103 /* If that character does not match, we will stride the proper */
1104 /* distance to propose a match that superimposes it on the last */
1105 /* instance of a character that matches it (per trt), or misses */
1106 /* it entirely if there is none. */
1107
1108 dirlen = len * direction;
1109 infinity = dirlen - (lim_byte + pos_byte + len + len) * direction;
1110 if (direction < 0)
1111 pat = (base_pat += len - 1);
1112 BM_tab_base = BM_tab;
1113 BM_tab += 0400;
1114 j = dirlen; /* to get it in a register */
1115 /* A character that does not appear in the pattern induces a */
1116 /* stride equal to the pattern length. */
1117 while (BM_tab_base != BM_tab)
1118 {
1119 *--BM_tab = j;
1120 *--BM_tab = j;
1121 *--BM_tab = j;
1122 *--BM_tab = j;
1123 }
1124 i = 0;
1125 while (i != infinity)
1126 {
1127 j = pat[i]; i += direction;
1128 if (i == dirlen) i = infinity;
1129 if (trt != 0)
1130 {
1131 k = (j = XINT (trt[j]));
1132 if (i == infinity)
1133 stride_for_teases = BM_tab[j];
1134 BM_tab[j] = dirlen - i;
1135 /* A translation table is accompanied by its inverse -- see */
1136 /* comment following downcase_table for details */
1137 while ((j = (unsigned char) XINT (inverse_trt[j])) != k)
1138 BM_tab[j] = dirlen - i;
1139 }
1140 else
1141 {
1142 if (i == infinity)
1143 stride_for_teases = BM_tab[j];
1144 BM_tab[j] = dirlen - i;
1145 }
1146 /* stride_for_teases tells how much to stride if we get a */
1147 /* match on the far character but are subsequently */
1148 /* disappointed, by recording what the stride would have been */
1149 /* for that character if the last character had been */
1150 /* different. */
1151 }
1152 infinity = dirlen - infinity;
1153 pos_byte += dirlen - ((direction > 0) ? direction : 0);
1154 /* loop invariant - POS_BYTE points at where last char (first
1155 char if reverse) of pattern would align in a possible match. */
1156 while (n != 0)
1157 {
1158 /* It's been reported that some (broken) compiler thinks that
1159 Boolean expressions in an arithmetic context are unsigned.
1160 Using an explicit ?1:0 prevents this. */
1161 if ((lim_byte - pos_byte - ((direction > 0) ? 1 : 0)) * direction
1162 < 0)
1163 return (n * (0 - direction));
1164 /* First we do the part we can by pointers (maybe nothing) */
1165 QUIT;
1166 pat = base_pat;
1167 limit = pos_byte - dirlen + direction;
1168 limit = ((direction > 0)
1169 ? BUFFER_CEILING_OF (limit)
1170 : BUFFER_FLOOR_OF (limit));
1171 /* LIMIT is now the last (not beyond-last!) value POS_BYTE
1172 can take on without hitting edge of buffer or the gap. */
1173 limit = ((direction > 0)
1174 ? min (lim_byte - 1, min (limit, pos_byte + 20000))
1175 : max (lim_byte, max (limit, pos_byte - 20000)));
1176 if ((limit - pos_byte) * direction > 20)
1177 {
1178 p_limit = BYTE_POS_ADDR (limit);
1179 p2 = (cursor = BYTE_POS_ADDR (pos_byte));
1180 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1181 while (1) /* use one cursor setting as long as i can */
1182 {
1183 if (direction > 0) /* worth duplicating */
1184 {
1185 /* Use signed comparison if appropriate
1186 to make cursor+infinity sure to be > p_limit.
1187 Assuming that the buffer lies in a range of addresses
1188 that are all "positive" (as ints) or all "negative",
1189 either kind of comparison will work as long
1190 as we don't step by infinity. So pick the kind
1191 that works when we do step by infinity. */
1192 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1193 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1194 cursor += BM_tab[*cursor];
1195 else
1196 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1197 cursor += BM_tab[*cursor];
1198 }
1199 else
1200 {
1201 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1202 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1203 cursor += BM_tab[*cursor];
1204 else
1205 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1206 cursor += BM_tab[*cursor];
1207 }
1208 /* If you are here, cursor is beyond the end of the searched region. */
1209 /* This can happen if you match on the far character of the pattern, */
1210 /* because the "stride" of that character is infinity, a number able */
1211 /* to throw you well beyond the end of the search. It can also */
1212 /* happen if you fail to match within the permitted region and would */
1213 /* otherwise try a character beyond that region */
1214 if ((cursor - p_limit) * direction <= len)
1215 break; /* a small overrun is genuine */
1216 cursor -= infinity; /* large overrun = hit */
1217 i = dirlen - direction;
1218 if (trt != 0)
1219 {
1220 while ((i -= direction) + direction != 0)
1221 if (pat[i] != XINT (trt[*(cursor -= direction)]))
1222 break;
1223 }
1224 else
1225 {
1226 while ((i -= direction) + direction != 0)
1227 if (pat[i] != *(cursor -= direction))
1228 break;
1229 }
1230 cursor += dirlen - i - direction; /* fix cursor */
1231 if (i + direction == 0)
1232 {
1233 cursor -= direction;
1234
1235 set_search_regs (pos_byte + cursor - p2 + ((direction > 0)
1236 ? 1 - len : 0),
1237 len);
1238
1239 if ((n -= direction) != 0)
1240 cursor += dirlen; /* to resume search */
1241 else
1242 return ((direction > 0)
1243 ? search_regs.end[0] : search_regs.start[0]);
1244 }
1245 else
1246 cursor += stride_for_teases; /* <sigh> we lose - */
1247 }
1248 pos_byte += cursor - p2;
1249 }
1250 else
1251 /* Now we'll pick up a clump that has to be done the hard */
1252 /* way because it covers a discontinuity */
1253 {
1254 limit = ((direction > 0)
1255 ? BUFFER_CEILING_OF (pos_byte - dirlen + 1)
1256 : BUFFER_FLOOR_OF (pos_byte - dirlen - 1));
1257 limit = ((direction > 0)
1258 ? min (limit + len, lim_byte - 1)
1259 : max (limit - len, lim_byte));
1260 /* LIMIT is now the last value POS_BYTE can have
1261 and still be valid for a possible match. */
1262 while (1)
1263 {
1264 /* This loop can be coded for space rather than */
1265 /* speed because it will usually run only once. */
1266 /* (the reach is at most len + 21, and typically */
1267 /* does not exceed len) */
1268 while ((limit - pos_byte) * direction >= 0)
1269 pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
1270 /* now run the same tests to distinguish going off the */
1271 /* end, a match or a phony match. */
1272 if ((pos_byte - limit) * direction <= len)
1273 break; /* ran off the end */
1274 /* Found what might be a match.
1275 Set POS_BYTE back to last (first if reverse) pos. */
1276 pos_byte -= infinity;
1277 i = dirlen - direction;
1278 while ((i -= direction) + direction != 0)
1279 {
1280 pos_byte -= direction;
1281 if (pat[i] != (trt != 0
1282 ? XINT (trt[FETCH_BYTE (pos_byte)])
1283 : FETCH_BYTE (pos_byte)))
1284 break;
1285 }
1286 /* Above loop has moved POS_BYTE part or all the way
1287 back to the first pos (last pos if reverse).
1288 Set it once again at the last (first if reverse) char. */
1289 pos_byte += dirlen - i- direction;
1290 if (i + direction == 0)
1291 {
1292 pos_byte -= direction;
1293
1294 set_search_regs (pos_byte + ((direction > 0) ? 1 - len : 0),
1295 len);
1296
1297 if ((n -= direction) != 0)
1298 pos_byte += dirlen; /* to resume search */
1299 else
1300 return ((direction > 0)
1301 ? search_regs.end[0] : search_regs.start[0]);
1302 }
1303 else
1304 pos_byte += stride_for_teases;
1305 }
1306 }
1307 /* We have done one clump. Can we continue? */
1308 if ((lim_byte - pos_byte) * direction < 0)
1309 return ((0 - n) * direction);
1310 }
1311 return BYTE_TO_CHAR (pos_byte);
1312 }
1313 }
1314
1315 /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
1316 for a match just found in the current buffer. */
1317
1318 static void
1319 set_search_regs (beg_byte, nbytes)
1320 int beg_byte, nbytes;
1321 {
1322 /* Make sure we have registers in which to store
1323 the match position. */
1324 if (search_regs.num_regs == 0)
1325 {
1326 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1327 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1328 search_regs.num_regs = 2;
1329 }
1330
1331 search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
1332 search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
1333 XSETBUFFER (last_thing_searched, current_buffer);
1334 }
1335 \f
1336 /* Given a string of words separated by word delimiters,
1337 compute a regexp that matches those exact words
1338 separated by arbitrary punctuation. */
1339
1340 static Lisp_Object
1341 wordify (string)
1342 Lisp_Object string;
1343 {
1344 register unsigned char *p, *o;
1345 register int i, len, punct_count = 0, word_count = 0;
1346 Lisp_Object val;
1347
1348 CHECK_STRING (string, 0);
1349 p = XSTRING (string)->data;
1350 len = XSTRING (string)->size;
1351
1352 for (i = 0; i < len; i++)
1353 if (SYNTAX (p[i]) != Sword)
1354 {
1355 punct_count++;
1356 if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
1357 }
1358 if (SYNTAX (p[len-1]) == Sword) word_count++;
1359 if (!word_count) return build_string ("");
1360
1361 val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
1362
1363 o = XSTRING (val)->data;
1364 *o++ = '\\';
1365 *o++ = 'b';
1366
1367 for (i = 0; i < len; i++)
1368 if (SYNTAX (p[i]) == Sword)
1369 *o++ = p[i];
1370 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1371 {
1372 *o++ = '\\';
1373 *o++ = 'W';
1374 *o++ = '\\';
1375 *o++ = 'W';
1376 *o++ = '*';
1377 }
1378
1379 *o++ = '\\';
1380 *o++ = 'b';
1381
1382 return val;
1383 }
1384 \f
1385 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1386 "MSearch backward: ",
1387 "Search backward from point for STRING.\n\
1388 Set point to the beginning of the occurrence found, and return point.\n\
1389 An optional second argument bounds the search; it is a buffer position.\n\
1390 The match found must not extend before that position.\n\
1391 Optional third argument, if t, means if fail just return nil (no error).\n\
1392 If not nil and not t, position at limit of search and return nil.\n\
1393 Optional fourth argument is repeat count--search for successive occurrences.\n\
1394 See also the functions `match-beginning', `match-end' and `replace-match'.")
1395 (string, bound, noerror, count)
1396 Lisp_Object string, bound, noerror, count;
1397 {
1398 return search_command (string, bound, noerror, count, -1, 0, 0);
1399 }
1400
1401 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
1402 "Search forward from point for STRING.\n\
1403 Set point to the end of the occurrence found, and return point.\n\
1404 An optional second argument bounds the search; it is a buffer position.\n\
1405 The match found must not extend after that position. nil is equivalent\n\
1406 to (point-max).\n\
1407 Optional third argument, if t, means if fail just return nil (no error).\n\
1408 If not nil and not t, move to limit of search and return nil.\n\
1409 Optional fourth argument is repeat count--search for successive occurrences.\n\
1410 See also the functions `match-beginning', `match-end' and `replace-match'.")
1411 (string, bound, noerror, count)
1412 Lisp_Object string, bound, noerror, count;
1413 {
1414 return search_command (string, bound, noerror, count, 1, 0, 0);
1415 }
1416
1417 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
1418 "sWord search backward: ",
1419 "Search backward from point for STRING, ignoring differences in punctuation.\n\
1420 Set point to the beginning of the occurrence found, and return point.\n\
1421 An optional second argument bounds the search; it is a buffer position.\n\
1422 The match found must not extend before that position.\n\
1423 Optional third argument, if t, means if fail just return nil (no error).\n\
1424 If not nil and not t, move to limit of search and return nil.\n\
1425 Optional fourth argument is repeat count--search for successive occurrences.")
1426 (string, bound, noerror, count)
1427 Lisp_Object string, bound, noerror, count;
1428 {
1429 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
1430 }
1431
1432 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
1433 "sWord search: ",
1434 "Search forward from point for STRING, ignoring differences in punctuation.\n\
1435 Set point to the end of the occurrence found, and return point.\n\
1436 An optional second argument bounds the search; it is a buffer position.\n\
1437 The match found must not extend after that position.\n\
1438 Optional third argument, if t, means if fail just return nil (no error).\n\
1439 If not nil and not t, move to limit of search and return nil.\n\
1440 Optional fourth argument is repeat count--search for successive occurrences.")
1441 (string, bound, noerror, count)
1442 Lisp_Object string, bound, noerror, count;
1443 {
1444 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
1445 }
1446
1447 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
1448 "sRE search backward: ",
1449 "Search backward from point for match for regular expression REGEXP.\n\
1450 Set point to the beginning of the match, and return point.\n\
1451 The match found is the one starting last in the buffer\n\
1452 and yet ending before the origin of the search.\n\
1453 An optional second argument bounds the search; it is a buffer position.\n\
1454 The match found must start at or after that position.\n\
1455 Optional third argument, if t, means if fail just return nil (no error).\n\
1456 If not nil and not t, move to limit of search and return nil.\n\
1457 Optional fourth argument is repeat count--search for successive occurrences.\n\
1458 See also the functions `match-beginning', `match-end' and `replace-match'.")
1459 (regexp, bound, noerror, count)
1460 Lisp_Object regexp, bound, noerror, count;
1461 {
1462 return search_command (regexp, bound, noerror, count, -1, 1, 0);
1463 }
1464
1465 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
1466 "sRE search: ",
1467 "Search forward from point for regular expression REGEXP.\n\
1468 Set point to the end of the occurrence found, and return point.\n\
1469 An optional second argument bounds the search; it is a buffer position.\n\
1470 The match found must not extend after that position.\n\
1471 Optional third argument, if t, means if fail just return nil (no error).\n\
1472 If not nil and not t, move to limit of search and return nil.\n\
1473 Optional fourth argument is repeat count--search for successive occurrences.\n\
1474 See also the functions `match-beginning', `match-end' and `replace-match'.")
1475 (regexp, bound, noerror, count)
1476 Lisp_Object regexp, bound, noerror, count;
1477 {
1478 return search_command (regexp, bound, noerror, count, 1, 1, 0);
1479 }
1480
1481 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
1482 "sPosix search backward: ",
1483 "Search backward from point for match for regular expression REGEXP.\n\
1484 Find the longest match in accord with Posix regular expression rules.\n\
1485 Set point to the beginning of the match, and return point.\n\
1486 The match found is the one starting last in the buffer\n\
1487 and yet ending before the origin of the search.\n\
1488 An optional second argument bounds the search; it is a buffer position.\n\
1489 The match found must start at or after that position.\n\
1490 Optional third argument, if t, means if fail just return nil (no error).\n\
1491 If not nil and not t, move to limit of search and return nil.\n\
1492 Optional fourth argument is repeat count--search for successive occurrences.\n\
1493 See also the functions `match-beginning', `match-end' and `replace-match'.")
1494 (regexp, bound, noerror, count)
1495 Lisp_Object regexp, bound, noerror, count;
1496 {
1497 return search_command (regexp, bound, noerror, count, -1, 1, 1);
1498 }
1499
1500 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
1501 "sPosix search: ",
1502 "Search forward from point for regular expression REGEXP.\n\
1503 Find the longest match in accord with Posix regular expression rules.\n\
1504 Set point to the end of the occurrence found, and return point.\n\
1505 An optional second argument bounds the search; it is a buffer position.\n\
1506 The match found must not extend after that position.\n\
1507 Optional third argument, if t, means if fail just return nil (no error).\n\
1508 If not nil and not t, move to limit of search and return nil.\n\
1509 Optional fourth argument is repeat count--search for successive occurrences.\n\
1510 See also the functions `match-beginning', `match-end' and `replace-match'.")
1511 (regexp, bound, noerror, count)
1512 Lisp_Object regexp, bound, noerror, count;
1513 {
1514 return search_command (regexp, bound, noerror, count, 1, 1, 1);
1515 }
1516 \f
1517 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
1518 "Replace text matched by last search with NEWTEXT.\n\
1519 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
1520 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
1521 based on the replaced text.\n\
1522 If the replaced text has only capital letters\n\
1523 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
1524 If the replaced text has at least one word starting with a capital letter,\n\
1525 then capitalize each word in NEWTEXT.\n\n\
1526 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1527 Otherwise treat `\\' as special:\n\
1528 `\\&' in NEWTEXT means substitute original matched text.\n\
1529 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1530 If Nth parens didn't match, substitute nothing.\n\
1531 `\\\\' means insert one `\\'.\n\
1532 FIXEDCASE and LITERAL are optional arguments.\n\
1533 Leaves point at end of replacement text.\n\
1534 \n\
1535 The optional fourth argument STRING can be a string to modify.\n\
1536 In that case, this function creates and returns a new string\n\
1537 which is made by replacing the part of STRING that was matched.\n\
1538 \n\
1539 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
1540 It says to replace just that subexpression instead of the whole match.\n\
1541 This is useful only after a regular expression search or match\n\
1542 since only regular expressions have distinguished subexpressions.")
1543 (newtext, fixedcase, literal, string, subexp)
1544 Lisp_Object newtext, fixedcase, literal, string, subexp;
1545 {
1546 enum { nochange, all_caps, cap_initial } case_action;
1547 register int pos, last;
1548 int some_multiletter_word;
1549 int some_lowercase;
1550 int some_uppercase;
1551 int some_nonuppercase_initial;
1552 register int c, prevc;
1553 int inslen;
1554 int sub;
1555 int opoint, newpoint;
1556
1557 CHECK_STRING (newtext, 0);
1558
1559 if (! NILP (string))
1560 CHECK_STRING (string, 4);
1561
1562 case_action = nochange; /* We tried an initialization */
1563 /* but some C compilers blew it */
1564
1565 if (search_regs.num_regs <= 0)
1566 error ("replace-match called before any match found");
1567
1568 if (NILP (subexp))
1569 sub = 0;
1570 else
1571 {
1572 CHECK_NUMBER (subexp, 3);
1573 sub = XINT (subexp);
1574 if (sub < 0 || sub >= search_regs.num_regs)
1575 args_out_of_range (subexp, make_number (search_regs.num_regs));
1576 }
1577
1578 if (NILP (string))
1579 {
1580 if (search_regs.start[sub] < BEGV
1581 || search_regs.start[sub] > search_regs.end[sub]
1582 || search_regs.end[sub] > ZV)
1583 args_out_of_range (make_number (search_regs.start[sub]),
1584 make_number (search_regs.end[sub]));
1585 }
1586 else
1587 {
1588 if (search_regs.start[sub] < 0
1589 || search_regs.start[sub] > search_regs.end[sub]
1590 || search_regs.end[sub] > XSTRING (string)->size)
1591 args_out_of_range (make_number (search_regs.start[sub]),
1592 make_number (search_regs.end[sub]));
1593 }
1594
1595 if (NILP (fixedcase))
1596 {
1597 int beg;
1598 /* Decide how to casify by examining the matched text. */
1599
1600 if (NILP (string))
1601 last = CHAR_TO_BYTE (search_regs.end[sub]);
1602 else
1603 last = search_regs.end[sub];
1604
1605 if (NILP (string))
1606 beg = CHAR_TO_BYTE (search_regs.start[sub]);
1607 else
1608 beg = search_regs.start[sub];
1609
1610 prevc = '\n';
1611 case_action = all_caps;
1612
1613 /* some_multiletter_word is set nonzero if any original word
1614 is more than one letter long. */
1615 some_multiletter_word = 0;
1616 some_lowercase = 0;
1617 some_nonuppercase_initial = 0;
1618 some_uppercase = 0;
1619
1620 for (pos = beg; pos < last; pos++)
1621 {
1622 if (NILP (string))
1623 c = FETCH_BYTE (pos);
1624 else
1625 c = XSTRING (string)->data[pos];
1626
1627 if (LOWERCASEP (c))
1628 {
1629 /* Cannot be all caps if any original char is lower case */
1630
1631 some_lowercase = 1;
1632 if (SYNTAX (prevc) != Sword)
1633 some_nonuppercase_initial = 1;
1634 else
1635 some_multiletter_word = 1;
1636 }
1637 else if (!NOCASEP (c))
1638 {
1639 some_uppercase = 1;
1640 if (SYNTAX (prevc) != Sword)
1641 ;
1642 else
1643 some_multiletter_word = 1;
1644 }
1645 else
1646 {
1647 /* If the initial is a caseless word constituent,
1648 treat that like a lowercase initial. */
1649 if (SYNTAX (prevc) != Sword)
1650 some_nonuppercase_initial = 1;
1651 }
1652
1653 prevc = c;
1654 }
1655
1656 /* Convert to all caps if the old text is all caps
1657 and has at least one multiletter word. */
1658 if (! some_lowercase && some_multiletter_word)
1659 case_action = all_caps;
1660 /* Capitalize each word, if the old text has all capitalized words. */
1661 else if (!some_nonuppercase_initial && some_multiletter_word)
1662 case_action = cap_initial;
1663 else if (!some_nonuppercase_initial && some_uppercase)
1664 /* Should x -> yz, operating on X, give Yz or YZ?
1665 We'll assume the latter. */
1666 case_action = all_caps;
1667 else
1668 case_action = nochange;
1669 }
1670
1671 /* Do replacement in a string. */
1672 if (!NILP (string))
1673 {
1674 Lisp_Object before, after;
1675
1676 before = Fsubstring (string, make_number (0),
1677 make_number (search_regs.start[sub]));
1678 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
1679
1680 /* Substitute parts of the match into NEWTEXT
1681 if desired. */
1682 if (NILP (literal))
1683 {
1684 int lastpos = -1;
1685 /* We build up the substituted string in ACCUM. */
1686 Lisp_Object accum;
1687 Lisp_Object middle;
1688
1689 accum = Qnil;
1690
1691 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
1692 {
1693 int substart = -1;
1694 int subend;
1695 int delbackslash = 0;
1696
1697 c = XSTRING (newtext)->data[pos];
1698 if (c == '\\')
1699 {
1700 c = XSTRING (newtext)->data[++pos];
1701 if (c == '&')
1702 {
1703 substart = search_regs.start[sub];
1704 subend = search_regs.end[sub];
1705 }
1706 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
1707 {
1708 if (search_regs.start[c - '0'] >= 0)
1709 {
1710 substart = search_regs.start[c - '0'];
1711 subend = search_regs.end[c - '0'];
1712 }
1713 }
1714 else if (c == '\\')
1715 delbackslash = 1;
1716 else
1717 error ("Invalid use of `\\' in replacement text");
1718 }
1719 if (substart >= 0)
1720 {
1721 if (pos - 1 != lastpos + 1)
1722 middle = Fsubstring (newtext,
1723 make_number (lastpos + 1),
1724 make_number (pos - 1));
1725 else
1726 middle = Qnil;
1727 accum = concat3 (accum, middle,
1728 Fsubstring (string, make_number (substart),
1729 make_number (subend)));
1730 lastpos = pos;
1731 }
1732 else if (delbackslash)
1733 {
1734 middle = Fsubstring (newtext, make_number (lastpos + 1),
1735 make_number (pos));
1736 accum = concat2 (accum, middle);
1737 lastpos = pos;
1738 }
1739 }
1740
1741 if (pos != lastpos + 1)
1742 middle = Fsubstring (newtext, make_number (lastpos + 1),
1743 make_number (pos));
1744 else
1745 middle = Qnil;
1746
1747 newtext = concat2 (accum, middle);
1748 }
1749
1750 /* Do case substitution in NEWTEXT if desired. */
1751 if (case_action == all_caps)
1752 newtext = Fupcase (newtext);
1753 else if (case_action == cap_initial)
1754 newtext = Fupcase_initials (newtext);
1755
1756 return concat3 (before, newtext, after);
1757 }
1758
1759 /* Record point, the move (quietly) to the start of the match. */
1760 if (PT > search_regs.start[sub])
1761 opoint = PT - ZV;
1762 else
1763 opoint = PT;
1764
1765 TEMP_SET_PT (search_regs.start[sub]);
1766
1767 /* We insert the replacement text before the old text, and then
1768 delete the original text. This means that markers at the
1769 beginning or end of the original will float to the corresponding
1770 position in the replacement. */
1771 if (!NILP (literal))
1772 Finsert_and_inherit (1, &newtext);
1773 else
1774 {
1775 struct gcpro gcpro1;
1776 GCPRO1 (newtext);
1777
1778 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
1779 {
1780 int offset = PT - search_regs.start[sub];
1781
1782 c = XSTRING (newtext)->data[pos];
1783 if (c == '\\')
1784 {
1785 c = XSTRING (newtext)->data[++pos];
1786 if (c == '&')
1787 Finsert_buffer_substring
1788 (Fcurrent_buffer (),
1789 make_number (search_regs.start[sub] + offset),
1790 make_number (search_regs.end[sub] + offset));
1791 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
1792 {
1793 if (search_regs.start[c - '0'] >= 1)
1794 Finsert_buffer_substring
1795 (Fcurrent_buffer (),
1796 make_number (search_regs.start[c - '0'] + offset),
1797 make_number (search_regs.end[c - '0'] + offset));
1798 }
1799 else if (c == '\\')
1800 insert_char (c);
1801 else
1802 error ("Invalid use of `\\' in replacement text");
1803 }
1804 else
1805 insert_char (c);
1806 }
1807 UNGCPRO;
1808 }
1809
1810 inslen = PT - (search_regs.start[sub]);
1811 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
1812
1813 if (case_action == all_caps)
1814 Fupcase_region (make_number (PT - inslen), make_number (PT));
1815 else if (case_action == cap_initial)
1816 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
1817
1818 newpoint = PT;
1819
1820 /* Put point back where it was in the text. */
1821 if (opoint <= 0)
1822 TEMP_SET_PT (opoint + ZV);
1823 else
1824 TEMP_SET_PT (opoint);
1825
1826 /* Now move point "officially" to the start of the inserted replacement. */
1827 move_if_not_intangible (newpoint);
1828
1829 return Qnil;
1830 }
1831 \f
1832 static Lisp_Object
1833 match_limit (num, beginningp)
1834 Lisp_Object num;
1835 int beginningp;
1836 {
1837 register int n;
1838
1839 CHECK_NUMBER (num, 0);
1840 n = XINT (num);
1841 if (n < 0 || n >= search_regs.num_regs)
1842 args_out_of_range (num, make_number (search_regs.num_regs));
1843 if (search_regs.num_regs <= 0
1844 || search_regs.start[n] < 0)
1845 return Qnil;
1846 return (make_number ((beginningp) ? search_regs.start[n]
1847 : search_regs.end[n]));
1848 }
1849
1850 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1851 "Return position of start of text matched by last search.\n\
1852 SUBEXP, a number, specifies which parenthesized expression in the last\n\
1853 regexp.\n\
1854 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1855 SUBEXP pairs.\n\
1856 Zero means the entire text matched by the whole regexp or whole string.")
1857 (subexp)
1858 Lisp_Object subexp;
1859 {
1860 return match_limit (subexp, 1);
1861 }
1862
1863 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1864 "Return position of end of text matched by last search.\n\
1865 SUBEXP, a number, specifies which parenthesized expression in the last\n\
1866 regexp.\n\
1867 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1868 SUBEXP pairs.\n\
1869 Zero means the entire text matched by the whole regexp or whole string.")
1870 (subexp)
1871 Lisp_Object subexp;
1872 {
1873 return match_limit (subexp, 0);
1874 }
1875
1876 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
1877 "Return a list containing all info on what the last search matched.\n\
1878 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1879 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1880 if the last match was on a buffer; integers or nil if a string was matched.\n\
1881 Use `store-match-data' to reinstate the data in this list.\n\
1882 \n\
1883 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
1884 \(rather than markers) to represent buffer positions.\n\
1885 If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
1886 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
1887 (integers, reuse)
1888 Lisp_Object integers, reuse;
1889 {
1890 Lisp_Object tail, prev;
1891 Lisp_Object *data;
1892 int i, len;
1893
1894 if (NILP (last_thing_searched))
1895 return Qnil;
1896
1897 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
1898 * sizeof (Lisp_Object));
1899
1900 len = -1;
1901 for (i = 0; i < search_regs.num_regs; i++)
1902 {
1903 int start = search_regs.start[i];
1904 if (start >= 0)
1905 {
1906 if (EQ (last_thing_searched, Qt)
1907 || ! NILP (integers))
1908 {
1909 XSETFASTINT (data[2 * i], start);
1910 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
1911 }
1912 else if (BUFFERP (last_thing_searched))
1913 {
1914 data[2 * i] = Fmake_marker ();
1915 Fset_marker (data[2 * i],
1916 make_number (start),
1917 last_thing_searched);
1918 data[2 * i + 1] = Fmake_marker ();
1919 Fset_marker (data[2 * i + 1],
1920 make_number (search_regs.end[i]),
1921 last_thing_searched);
1922 }
1923 else
1924 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1925 abort ();
1926
1927 len = i;
1928 }
1929 else
1930 data[2 * i] = data [2 * i + 1] = Qnil;
1931 }
1932
1933 /* If REUSE is not usable, cons up the values and return them. */
1934 if (! CONSP (reuse))
1935 return Flist (2 * len + 2, data);
1936
1937 /* If REUSE is a list, store as many value elements as will fit
1938 into the elements of REUSE. */
1939 for (i = 0, tail = reuse; CONSP (tail);
1940 i++, tail = XCONS (tail)->cdr)
1941 {
1942 if (i < 2 * len + 2)
1943 XCONS (tail)->car = data[i];
1944 else
1945 XCONS (tail)->car = Qnil;
1946 prev = tail;
1947 }
1948
1949 /* If we couldn't fit all value elements into REUSE,
1950 cons up the rest of them and add them to the end of REUSE. */
1951 if (i < 2 * len + 2)
1952 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
1953
1954 return reuse;
1955 }
1956
1957
1958 DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
1959 "Set internal data on last search match from elements of LIST.\n\
1960 LIST should have been created by calling `match-data' previously.")
1961 (list)
1962 register Lisp_Object list;
1963 {
1964 register int i;
1965 register Lisp_Object marker;
1966
1967 if (running_asynch_code)
1968 save_search_regs ();
1969
1970 if (!CONSP (list) && !NILP (list))
1971 list = wrong_type_argument (Qconsp, list);
1972
1973 /* Unless we find a marker with a buffer in LIST, assume that this
1974 match data came from a string. */
1975 last_thing_searched = Qt;
1976
1977 /* Allocate registers if they don't already exist. */
1978 {
1979 int length = XFASTINT (Flength (list)) / 2;
1980
1981 if (length > search_regs.num_regs)
1982 {
1983 if (search_regs.num_regs == 0)
1984 {
1985 search_regs.start
1986 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1987 search_regs.end
1988 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1989 }
1990 else
1991 {
1992 search_regs.start
1993 = (regoff_t *) xrealloc (search_regs.start,
1994 length * sizeof (regoff_t));
1995 search_regs.end
1996 = (regoff_t *) xrealloc (search_regs.end,
1997 length * sizeof (regoff_t));
1998 }
1999
2000 search_regs.num_regs = length;
2001 }
2002 }
2003
2004 for (i = 0; i < search_regs.num_regs; i++)
2005 {
2006 marker = Fcar (list);
2007 if (NILP (marker))
2008 {
2009 search_regs.start[i] = -1;
2010 list = Fcdr (list);
2011 }
2012 else
2013 {
2014 if (MARKERP (marker))
2015 {
2016 if (XMARKER (marker)->buffer == 0)
2017 XSETFASTINT (marker, 0);
2018 else
2019 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
2020 }
2021
2022 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2023 search_regs.start[i] = XINT (marker);
2024 list = Fcdr (list);
2025
2026 marker = Fcar (list);
2027 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
2028 XSETFASTINT (marker, 0);
2029
2030 CHECK_NUMBER_COERCE_MARKER (marker, 0);
2031 search_regs.end[i] = XINT (marker);
2032 }
2033 list = Fcdr (list);
2034 }
2035
2036 return Qnil;
2037 }
2038
2039 /* If non-zero the match data have been saved in saved_search_regs
2040 during the execution of a sentinel or filter. */
2041 static int search_regs_saved;
2042 static struct re_registers saved_search_regs;
2043
2044 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
2045 if asynchronous code (filter or sentinel) is running. */
2046 static void
2047 save_search_regs ()
2048 {
2049 if (!search_regs_saved)
2050 {
2051 saved_search_regs.num_regs = search_regs.num_regs;
2052 saved_search_regs.start = search_regs.start;
2053 saved_search_regs.end = search_regs.end;
2054 search_regs.num_regs = 0;
2055 search_regs.start = 0;
2056 search_regs.end = 0;
2057
2058 search_regs_saved = 1;
2059 }
2060 }
2061
2062 /* Called upon exit from filters and sentinels. */
2063 void
2064 restore_match_data ()
2065 {
2066 if (search_regs_saved)
2067 {
2068 if (search_regs.num_regs > 0)
2069 {
2070 xfree (search_regs.start);
2071 xfree (search_regs.end);
2072 }
2073 search_regs.num_regs = saved_search_regs.num_regs;
2074 search_regs.start = saved_search_regs.start;
2075 search_regs.end = saved_search_regs.end;
2076
2077 search_regs_saved = 0;
2078 }
2079 }
2080
2081 /* Quote a string to inactivate reg-expr chars */
2082
2083 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
2084 "Return a regexp string which matches exactly STRING and nothing else.")
2085 (string)
2086 Lisp_Object string;
2087 {
2088 register unsigned char *in, *out, *end;
2089 register unsigned char *temp;
2090
2091 CHECK_STRING (string, 0);
2092
2093 temp = (unsigned char *) alloca (XSTRING (string)->size * 2);
2094
2095 /* Now copy the data into the new string, inserting escapes. */
2096
2097 in = XSTRING (string)->data;
2098 end = in + XSTRING (string)->size;
2099 out = temp;
2100
2101 for (; in != end; in++)
2102 {
2103 if (*in == '[' || *in == ']'
2104 || *in == '*' || *in == '.' || *in == '\\'
2105 || *in == '?' || *in == '+'
2106 || *in == '^' || *in == '$')
2107 *out++ = '\\';
2108 *out++ = *in;
2109 }
2110
2111 return make_string (temp, out - temp);
2112 }
2113 \f
2114 syms_of_search ()
2115 {
2116 register int i;
2117
2118 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
2119 {
2120 searchbufs[i].buf.allocated = 100;
2121 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
2122 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
2123 searchbufs[i].regexp = Qnil;
2124 staticpro (&searchbufs[i].regexp);
2125 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
2126 }
2127 searchbuf_head = &searchbufs[0];
2128
2129 Qsearch_failed = intern ("search-failed");
2130 staticpro (&Qsearch_failed);
2131 Qinvalid_regexp = intern ("invalid-regexp");
2132 staticpro (&Qinvalid_regexp);
2133
2134 Fput (Qsearch_failed, Qerror_conditions,
2135 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
2136 Fput (Qsearch_failed, Qerror_message,
2137 build_string ("Search failed"));
2138
2139 Fput (Qinvalid_regexp, Qerror_conditions,
2140 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2141 Fput (Qinvalid_regexp, Qerror_message,
2142 build_string ("Invalid regexp"));
2143
2144 last_thing_searched = Qnil;
2145 staticpro (&last_thing_searched);
2146
2147 defsubr (&Slooking_at);
2148 defsubr (&Sposix_looking_at);
2149 defsubr (&Sstring_match);
2150 defsubr (&Sposix_string_match);
2151 defsubr (&Ssearch_forward);
2152 defsubr (&Ssearch_backward);
2153 defsubr (&Sword_search_forward);
2154 defsubr (&Sword_search_backward);
2155 defsubr (&Sre_search_forward);
2156 defsubr (&Sre_search_backward);
2157 defsubr (&Sposix_search_forward);
2158 defsubr (&Sposix_search_backward);
2159 defsubr (&Sreplace_match);
2160 defsubr (&Smatch_beginning);
2161 defsubr (&Smatch_end);
2162 defsubr (&Smatch_data);
2163 defsubr (&Sstore_match_data);
2164 defsubr (&Sregexp_quote);
2165 }