]> code.delx.au - gnu-emacs/blob - src/search.c
(casify_region): Scan in bytes and chars.
[gnu-emacs] / src / search.c
1 /* String search routines for GNU Emacs.
2 Copyright (C) 1985, 1986, 1987, 1993, 1994 Free Software Foundation, Inc.
3
4 This file is part of GNU Emacs.
5
6 GNU Emacs is free software; you can redistribute it and/or modify
7 it under the terms of the GNU General Public License as published by
8 the Free Software Foundation; either version 2, or (at your option)
9 any later version.
10
11 GNU Emacs is distributed in the hope that it will be useful,
12 but WITHOUT ANY WARRANTY; without even the implied warranty of
13 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 GNU General Public License for more details.
15
16 You should have received a copy of the GNU General Public License
17 along with GNU Emacs; see the file COPYING. If not, write to
18 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
19 Boston, MA 02111-1307, USA. */
20
21
22 #include <config.h>
23 #include "lisp.h"
24 #include "syntax.h"
25 #include "category.h"
26 #include "buffer.h"
27 #include "charset.h"
28 #include "region-cache.h"
29 #include "commands.h"
30 #include "blockinput.h"
31 #include "intervals.h"
32
33 #include <sys/types.h>
34 #include "regex.h"
35
36 #define REGEXP_CACHE_SIZE 20
37
38 /* If the regexp is non-nil, then the buffer contains the compiled form
39 of that regexp, suitable for searching. */
40 struct regexp_cache
41 {
42 struct regexp_cache *next;
43 Lisp_Object regexp;
44 struct re_pattern_buffer buf;
45 char fastmap[0400];
46 /* Nonzero means regexp was compiled to do full POSIX backtracking. */
47 char posix;
48 };
49
50 /* The instances of that struct. */
51 struct regexp_cache searchbufs[REGEXP_CACHE_SIZE];
52
53 /* The head of the linked list; points to the most recently used buffer. */
54 struct regexp_cache *searchbuf_head;
55
56
57 /* Every call to re_match, etc., must pass &search_regs as the regs
58 argument unless you can show it is unnecessary (i.e., if re_match
59 is certainly going to be called again before region-around-match
60 can be called).
61
62 Since the registers are now dynamically allocated, we need to make
63 sure not to refer to the Nth register before checking that it has
64 been allocated by checking search_regs.num_regs.
65
66 The regex code keeps track of whether it has allocated the search
67 buffer using bits in the re_pattern_buffer. This means that whenever
68 you compile a new pattern, it completely forgets whether it has
69 allocated any registers, and will allocate new registers the next
70 time you call a searching or matching function. Therefore, we need
71 to call re_set_registers after compiling a new pattern or after
72 setting the match registers, so that the regex functions will be
73 able to free or re-allocate it properly. */
74 static struct re_registers search_regs;
75
76 /* The buffer in which the last search was performed, or
77 Qt if the last search was done in a string;
78 Qnil if no searching has been done yet. */
79 static Lisp_Object last_thing_searched;
80
81 /* error condition signaled when regexp compile_pattern fails */
82
83 Lisp_Object Qinvalid_regexp;
84
85 static void set_search_regs ();
86 static void save_search_regs ();
87
88 static int search_buffer ();
89
90 static void
91 matcher_overflow ()
92 {
93 error ("Stack overflow in regexp matcher");
94 }
95
96 #ifdef __STDC__
97 #define CONST const
98 #else
99 #define CONST
100 #endif
101
102 /* Compile a regexp and signal a Lisp error if anything goes wrong.
103 PATTERN is the pattern to compile.
104 CP is the place to put the result.
105 TRANSLATE is a translation table for ignoring case, or NULL for none.
106 REGP is the structure that says where to store the "register"
107 values that will result from matching this pattern.
108 If it is 0, we should compile the pattern not to record any
109 subexpression bounds.
110 POSIX is nonzero if we want full backtracking (POSIX style)
111 for this pattern. 0 means backtrack only enough to get a valid match.
112 MULTIBYTE is nonzero if we want to handle multibyte characters in
113 PATTERN. 0 means all multibyte characters are recognized just as
114 sequences of binary data. */
115
116 static void
117 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
118 struct regexp_cache *cp;
119 Lisp_Object pattern;
120 Lisp_Object *translate;
121 struct re_registers *regp;
122 int posix;
123 int multibyte;
124 {
125 char *val;
126 reg_syntax_t old;
127
128 cp->regexp = Qnil;
129 cp->buf.translate = translate;
130 cp->posix = posix;
131 cp->buf.multibyte = multibyte;
132 BLOCK_INPUT;
133 old = re_set_syntax (RE_SYNTAX_EMACS
134 | (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
135 val = (char *) re_compile_pattern ((char *) XSTRING (pattern)->data,
136 XSTRING (pattern)->size, &cp->buf);
137 re_set_syntax (old);
138 UNBLOCK_INPUT;
139 if (val)
140 Fsignal (Qinvalid_regexp, Fcons (build_string (val), Qnil));
141
142 cp->regexp = Fcopy_sequence (pattern);
143 }
144
145 /* Compile a regexp if necessary, but first check to see if there's one in
146 the cache.
147 PATTERN is the pattern to compile.
148 TRANSLATE is a translation table for ignoring case, or NULL for none.
149 REGP is the structure that says where to store the "register"
150 values that will result from matching this pattern.
151 If it is 0, we should compile the pattern not to record any
152 subexpression bounds.
153 POSIX is nonzero if we want full backtracking (POSIX style)
154 for this pattern. 0 means backtrack only enough to get a valid match. */
155
156 struct re_pattern_buffer *
157 compile_pattern (pattern, regp, translate, posix)
158 Lisp_Object pattern;
159 struct re_registers *regp;
160 Lisp_Object *translate;
161 int posix;
162 {
163 struct regexp_cache *cp, **cpp;
164 /* Should we check it here, or add an argument `multibyte' to this
165 function? */
166 int multibyte = !NILP (current_buffer->enable_multibyte_characters);
167
168 for (cpp = &searchbuf_head; ; cpp = &cp->next)
169 {
170 cp = *cpp;
171 if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
172 && !NILP (Fstring_equal (cp->regexp, pattern))
173 && cp->buf.translate == translate
174 && cp->posix == posix
175 && cp->buf.multibyte == multibyte)
176 break;
177
178 /* If we're at the end of the cache, compile into the last cell. */
179 if (cp->next == 0)
180 {
181 compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
182 break;
183 }
184 }
185
186 /* When we get here, cp (aka *cpp) contains the compiled pattern,
187 either because we found it in the cache or because we just compiled it.
188 Move it to the front of the queue to mark it as most recently used. */
189 *cpp = cp->next;
190 cp->next = searchbuf_head;
191 searchbuf_head = cp;
192
193 /* Advise the searching functions about the space we have allocated
194 for register data. */
195 if (regp)
196 re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
197
198 return &cp->buf;
199 }
200
201 /* Error condition used for failing searches */
202 Lisp_Object Qsearch_failed;
203
204 Lisp_Object
205 signal_failure (arg)
206 Lisp_Object arg;
207 {
208 Fsignal (Qsearch_failed, Fcons (arg, Qnil));
209 return Qnil;
210 }
211 \f
212 static Lisp_Object
213 looking_at_1 (string, posix)
214 Lisp_Object string;
215 int posix;
216 {
217 Lisp_Object val;
218 unsigned char *p1, *p2;
219 int s1, s2;
220 register int i;
221 struct re_pattern_buffer *bufp;
222
223 if (running_asynch_code)
224 save_search_regs ();
225
226 CHECK_STRING (string, 0);
227 bufp = compile_pattern (string, &search_regs,
228 (!NILP (current_buffer->case_fold_search)
229 ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
230 posix);
231
232 immediate_quit = 1;
233 QUIT; /* Do a pending quit right away, to avoid paradoxical behavior */
234
235 /* Get pointers and sizes of the two strings
236 that make up the visible portion of the buffer. */
237
238 p1 = BEGV_ADDR;
239 s1 = GPT - BEGV;
240 p2 = GAP_END_ADDR;
241 s2 = ZV - GPT;
242 if (s1 < 0)
243 {
244 p2 = p1;
245 s2 = ZV - BEGV;
246 s1 = 0;
247 }
248 if (s2 < 0)
249 {
250 s1 = ZV - BEGV;
251 s2 = 0;
252 }
253
254 re_match_object = Qnil;
255
256 i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
257 PT - BEGV, &search_regs,
258 ZV - BEGV);
259 if (i == -2)
260 matcher_overflow ();
261
262 val = (0 <= i ? Qt : Qnil);
263 for (i = 0; i < search_regs.num_regs; i++)
264 if (search_regs.start[i] >= 0)
265 {
266 search_regs.start[i] += BEGV;
267 search_regs.end[i] += BEGV;
268 }
269 XSETBUFFER (last_thing_searched, current_buffer);
270 immediate_quit = 0;
271 return val;
272 }
273
274 DEFUN ("looking-at", Flooking_at, Slooking_at, 1, 1, 0,
275 "Return t if text after point matches regular expression REGEXP.\n\
276 This function modifies the match data that `match-beginning',\n\
277 `match-end' and `match-data' access; save and restore the match\n\
278 data if you want to preserve them.")
279 (regexp)
280 Lisp_Object regexp;
281 {
282 return looking_at_1 (regexp, 0);
283 }
284
285 DEFUN ("posix-looking-at", Fposix_looking_at, Sposix_looking_at, 1, 1, 0,
286 "Return t if text after point matches regular expression REGEXP.\n\
287 Find the longest match, in accord with Posix regular expression rules.\n\
288 This function modifies the match data that `match-beginning',\n\
289 `match-end' and `match-data' access; save and restore the match\n\
290 data if you want to preserve them.")
291 (regexp)
292 Lisp_Object regexp;
293 {
294 return looking_at_1 (regexp, 1);
295 }
296 \f
297 static Lisp_Object
298 string_match_1 (regexp, string, start, posix)
299 Lisp_Object regexp, string, start;
300 int posix;
301 {
302 int val;
303 int s;
304 struct re_pattern_buffer *bufp;
305
306 if (running_asynch_code)
307 save_search_regs ();
308
309 CHECK_STRING (regexp, 0);
310 CHECK_STRING (string, 1);
311
312 if (NILP (start))
313 s = 0;
314 else
315 {
316 int len = XSTRING (string)->size;
317
318 CHECK_NUMBER (start, 2);
319 s = XINT (start);
320 if (s < 0 && -s <= len)
321 s = len + s;
322 else if (0 > s || s > len)
323 args_out_of_range (string, start);
324 }
325
326 bufp = compile_pattern (regexp, &search_regs,
327 (!NILP (current_buffer->case_fold_search)
328 ? XCHAR_TABLE (DOWNCASE_TABLE)->contents : 0),
329 posix);
330 immediate_quit = 1;
331 re_match_object = string;
332
333 val = re_search (bufp, (char *) XSTRING (string)->data,
334 XSTRING (string)->size, s, XSTRING (string)->size - s,
335 &search_regs);
336 immediate_quit = 0;
337 last_thing_searched = Qt;
338 if (val == -2)
339 matcher_overflow ();
340 if (val < 0) return Qnil;
341 return make_number (val);
342 }
343
344 DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
345 "Return index of start of first match for REGEXP in STRING, or nil.\n\
346 If third arg START is non-nil, start search at that index in STRING.\n\
347 For index of first char beyond the match, do (match-end 0).\n\
348 `match-end' and `match-beginning' also give indices of substrings\n\
349 matched by parenthesis constructs in the pattern.")
350 (regexp, string, start)
351 Lisp_Object regexp, string, start;
352 {
353 return string_match_1 (regexp, string, start, 0);
354 }
355
356 DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
357 "Return index of start of first match for REGEXP in STRING, or nil.\n\
358 Find the longest match, in accord with Posix regular expression rules.\n\
359 If third arg START is non-nil, start search at that index in STRING.\n\
360 For index of first char beyond the match, do (match-end 0).\n\
361 `match-end' and `match-beginning' also give indices of substrings\n\
362 matched by parenthesis constructs in the pattern.")
363 (regexp, string, start)
364 Lisp_Object regexp, string, start;
365 {
366 return string_match_1 (regexp, string, start, 1);
367 }
368
369 /* Match REGEXP against STRING, searching all of STRING,
370 and return the index of the match, or negative on failure.
371 This does not clobber the match data. */
372
373 int
374 fast_string_match (regexp, string)
375 Lisp_Object regexp, string;
376 {
377 int val;
378 struct re_pattern_buffer *bufp;
379
380 bufp = compile_pattern (regexp, 0, 0, 0);
381 immediate_quit = 1;
382 re_match_object = string;
383
384 val = re_search (bufp, (char *) XSTRING (string)->data,
385 XSTRING (string)->size, 0, XSTRING (string)->size,
386 0);
387 immediate_quit = 0;
388 return val;
389 }
390
391 /* Match REGEXP against STRING, searching all of STRING ignoring case,
392 and return the index of the match, or negative on failure.
393 This does not clobber the match data. */
394
395 extern Lisp_Object Vascii_downcase_table;
396
397 int
398 fast_c_string_match_ignore_case (regexp, string)
399 Lisp_Object regexp;
400 char *string;
401 {
402 int val;
403 struct re_pattern_buffer *bufp;
404 int len = strlen (string);
405
406 re_match_object = Qt;
407 bufp = compile_pattern (regexp, 0,
408 XCHAR_TABLE (Vascii_downcase_table)->contents, 0);
409 immediate_quit = 1;
410 val = re_search (bufp, string, len, 0, len, 0);
411 immediate_quit = 0;
412 return val;
413 }
414 \f
415 /* max and min. */
416
417 static int
418 max (a, b)
419 int a, b;
420 {
421 return ((a > b) ? a : b);
422 }
423
424 static int
425 min (a, b)
426 int a, b;
427 {
428 return ((a < b) ? a : b);
429 }
430
431 \f
432 /* The newline cache: remembering which sections of text have no newlines. */
433
434 /* If the user has requested newline caching, make sure it's on.
435 Otherwise, make sure it's off.
436 This is our cheezy way of associating an action with the change of
437 state of a buffer-local variable. */
438 static void
439 newline_cache_on_off (buf)
440 struct buffer *buf;
441 {
442 if (NILP (buf->cache_long_line_scans))
443 {
444 /* It should be off. */
445 if (buf->newline_cache)
446 {
447 free_region_cache (buf->newline_cache);
448 buf->newline_cache = 0;
449 }
450 }
451 else
452 {
453 /* It should be on. */
454 if (buf->newline_cache == 0)
455 buf->newline_cache = new_region_cache ();
456 }
457 }
458
459 \f
460 /* Search for COUNT instances of the character TARGET between START and END.
461
462 If COUNT is positive, search forwards; END must be >= START.
463 If COUNT is negative, search backwards for the -COUNTth instance;
464 END must be <= START.
465 If COUNT is zero, do anything you please; run rogue, for all I care.
466
467 If END is zero, use BEGV or ZV instead, as appropriate for the
468 direction indicated by COUNT.
469
470 If we find COUNT instances, set *SHORTAGE to zero, and return the
471 position after the COUNTth match. Note that for reverse motion
472 this is not the same as the usual convention for Emacs motion commands.
473
474 If we don't find COUNT instances before reaching END, set *SHORTAGE
475 to the number of TARGETs left unfound, and return END.
476
477 If ALLOW_QUIT is non-zero, set immediate_quit. That's good to do
478 except when inside redisplay. */
479
480 scan_buffer (target, start, end, count, shortage, allow_quit)
481 register int target;
482 int start, end;
483 int count;
484 int *shortage;
485 int allow_quit;
486 {
487 struct region_cache *newline_cache;
488 int direction;
489
490 if (count > 0)
491 {
492 direction = 1;
493 if (! end) end = ZV;
494 }
495 else
496 {
497 direction = -1;
498 if (! end) end = BEGV;
499 }
500
501 newline_cache_on_off (current_buffer);
502 newline_cache = current_buffer->newline_cache;
503
504 if (shortage != 0)
505 *shortage = 0;
506
507 immediate_quit = allow_quit;
508
509 if (count > 0)
510 while (start != end)
511 {
512 /* Our innermost scanning loop is very simple; it doesn't know
513 about gaps, buffer ends, or the newline cache. ceiling is
514 the position of the last character before the next such
515 obstacle --- the last character the dumb search loop should
516 examine. */
517 register int ceiling = end - 1;
518
519 /* If we're looking for a newline, consult the newline cache
520 to see where we can avoid some scanning. */
521 if (target == '\n' && newline_cache)
522 {
523 int next_change;
524 immediate_quit = 0;
525 while (region_cache_forward
526 (current_buffer, newline_cache, start, &next_change))
527 start = next_change;
528 immediate_quit = allow_quit;
529
530 /* start should never be after end. */
531 if (start >= end)
532 start = end - 1;
533
534 /* Now the text after start is an unknown region, and
535 next_change is the position of the next known region. */
536 ceiling = min (next_change - 1, ceiling);
537 }
538
539 /* The dumb loop can only scan text stored in contiguous
540 bytes. BUFFER_CEILING_OF returns the last character
541 position that is contiguous, so the ceiling is the
542 position after that. */
543 ceiling = min (BUFFER_CEILING_OF (start), ceiling);
544
545 {
546 /* The termination address of the dumb loop. */
547 register unsigned char *ceiling_addr = POS_ADDR (ceiling) + 1;
548 register unsigned char *cursor = POS_ADDR (start);
549 unsigned char *base = cursor;
550
551 while (cursor < ceiling_addr)
552 {
553 unsigned char *scan_start = cursor;
554
555 /* The dumb loop. */
556 while (*cursor != target && ++cursor < ceiling_addr)
557 ;
558
559 /* If we're looking for newlines, cache the fact that
560 the region from start to cursor is free of them. */
561 if (target == '\n' && newline_cache)
562 know_region_cache (current_buffer, newline_cache,
563 start + scan_start - base,
564 start + cursor - base);
565
566 /* Did we find the target character? */
567 if (cursor < ceiling_addr)
568 {
569 if (--count == 0)
570 {
571 immediate_quit = 0;
572 return (start + cursor - base + 1);
573 }
574 cursor++;
575 }
576 }
577
578 start += cursor - base;
579 }
580 }
581 else
582 while (start > end)
583 {
584 /* The last character to check before the next obstacle. */
585 register int ceiling = end;
586
587 /* Consult the newline cache, if appropriate. */
588 if (target == '\n' && newline_cache)
589 {
590 int next_change;
591 immediate_quit = 0;
592 while (region_cache_backward
593 (current_buffer, newline_cache, start, &next_change))
594 start = next_change;
595 immediate_quit = allow_quit;
596
597 /* Start should never be at or before end. */
598 if (start <= end)
599 start = end + 1;
600
601 /* Now the text before start is an unknown region, and
602 next_change is the position of the next known region. */
603 ceiling = max (next_change, ceiling);
604 }
605
606 /* Stop scanning before the gap. */
607 ceiling = max (BUFFER_FLOOR_OF (start - 1), ceiling);
608
609 {
610 /* The termination address of the dumb loop. */
611 register unsigned char *ceiling_addr = POS_ADDR (ceiling);
612 register unsigned char *cursor = POS_ADDR (start - 1);
613 unsigned char *base = cursor;
614
615 while (cursor >= ceiling_addr)
616 {
617 unsigned char *scan_start = cursor;
618
619 while (*cursor != target && --cursor >= ceiling_addr)
620 ;
621
622 /* If we're looking for newlines, cache the fact that
623 the region from after the cursor to start is free of them. */
624 if (target == '\n' && newline_cache)
625 know_region_cache (current_buffer, newline_cache,
626 start + cursor - base,
627 start + scan_start - base);
628
629 /* Did we find the target character? */
630 if (cursor >= ceiling_addr)
631 {
632 if (++count >= 0)
633 {
634 immediate_quit = 0;
635 return (start + cursor - base);
636 }
637 cursor--;
638 }
639 }
640
641 start += cursor - base;
642 }
643 }
644
645 immediate_quit = 0;
646 if (shortage != 0)
647 *shortage = count * direction;
648 return start;
649 }
650
651 int
652 find_next_newline_no_quit (from, cnt)
653 register int from, cnt;
654 {
655 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
656 }
657
658 int
659 find_next_newline (from, cnt)
660 register int from, cnt;
661 {
662 return scan_buffer ('\n', from, 0, cnt, (int *) 0, 1);
663 }
664
665
666 /* Like find_next_newline, but returns position before the newline,
667 not after, and only search up to TO. This isn't just
668 find_next_newline (...)-1, because you might hit TO. */
669 int
670 find_before_next_newline (from, to, cnt)
671 int from, to, cnt;
672 {
673 int shortage;
674 int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
675
676 if (shortage == 0)
677 pos--;
678
679 return pos;
680 }
681 \f
682 /* Subroutines of Lisp buffer search functions. */
683
684 static Lisp_Object
685 search_command (string, bound, noerror, count, direction, RE, posix)
686 Lisp_Object string, bound, noerror, count;
687 int direction;
688 int RE;
689 int posix;
690 {
691 register int np;
692 int lim;
693 int n = direction;
694
695 if (!NILP (count))
696 {
697 CHECK_NUMBER (count, 3);
698 n *= XINT (count);
699 }
700
701 CHECK_STRING (string, 0);
702 if (NILP (bound))
703 lim = n > 0 ? ZV : BEGV;
704 else
705 {
706 CHECK_NUMBER_COERCE_MARKER (bound, 1);
707 lim = XINT (bound);
708 if (n > 0 ? lim < PT : lim > PT)
709 error ("Invalid search bound (wrong side of point)");
710 if (lim > ZV)
711 lim = ZV;
712 if (lim < BEGV)
713 lim = BEGV;
714 }
715
716 np = search_buffer (string, PT, lim, n, RE,
717 (!NILP (current_buffer->case_fold_search)
718 ? XCHAR_TABLE (current_buffer->case_canon_table)->contents
719 : 0),
720 (!NILP (current_buffer->case_fold_search)
721 ? XCHAR_TABLE (current_buffer->case_eqv_table)->contents
722 : 0),
723 posix);
724 if (np <= 0)
725 {
726 if (NILP (noerror))
727 return signal_failure (string);
728 if (!EQ (noerror, Qt))
729 {
730 if (lim < BEGV || lim > ZV)
731 abort ();
732 SET_PT (lim);
733 return Qnil;
734 #if 0 /* This would be clean, but maybe programs depend on
735 a value of nil here. */
736 np = lim;
737 #endif
738 }
739 else
740 return Qnil;
741 }
742
743 if (np < BEGV || np > ZV)
744 abort ();
745
746 SET_PT (np);
747
748 return make_number (np);
749 }
750 \f
751 static int
752 trivial_regexp_p (regexp)
753 Lisp_Object regexp;
754 {
755 int len = XSTRING (regexp)->size;
756 unsigned char *s = XSTRING (regexp)->data;
757 unsigned char c;
758 while (--len >= 0)
759 {
760 switch (*s++)
761 {
762 case '.': case '*': case '+': case '?': case '[': case '^': case '$':
763 return 0;
764 case '\\':
765 if (--len < 0)
766 return 0;
767 switch (*s++)
768 {
769 case '|': case '(': case ')': case '`': case '\'': case 'b':
770 case 'B': case '<': case '>': case 'w': case 'W': case 's':
771 case 'S': case '=':
772 case 'c': case 'C': /* for categoryspec and notcategoryspec */
773 case '1': case '2': case '3': case '4': case '5':
774 case '6': case '7': case '8': case '9':
775 return 0;
776 }
777 }
778 }
779 return 1;
780 }
781
782 /* Search for the n'th occurrence of STRING in the current buffer,
783 starting at position POS and stopping at position LIM,
784 treating STRING as a literal string if RE is false or as
785 a regular expression if RE is true.
786
787 If N is positive, searching is forward and LIM must be greater than POS.
788 If N is negative, searching is backward and LIM must be less than POS.
789
790 Returns -x if only N-x occurrences found (x > 0),
791 or else the position at the beginning of the Nth occurrence
792 (if searching backward) or the end (if searching forward).
793
794 POSIX is nonzero if we want full backtracking (POSIX style)
795 for this pattern. 0 means backtrack only enough to get a valid match. */
796
797 static int
798 search_buffer (string, pos, lim, n, RE, trt, inverse_trt, posix)
799 Lisp_Object string;
800 int pos;
801 int lim;
802 int n;
803 int RE;
804 Lisp_Object *trt;
805 Lisp_Object *inverse_trt;
806 int posix;
807 {
808 int len = XSTRING (string)->size;
809 unsigned char *base_pat = XSTRING (string)->data;
810 register int *BM_tab;
811 int *BM_tab_base;
812 register int direction = ((n > 0) ? 1 : -1);
813 register int dirlen;
814 int infinity, limit, k, stride_for_teases;
815 register unsigned char *pat, *cursor, *p_limit;
816 register int i, j;
817 unsigned char *p1, *p2;
818 int s1, s2;
819
820 if (running_asynch_code)
821 save_search_regs ();
822
823 /* Null string is found at starting position. */
824 if (len == 0)
825 {
826 set_search_regs (pos, 0);
827 return pos;
828 }
829
830 /* Searching 0 times means don't move. */
831 if (n == 0)
832 return pos;
833
834 if (RE && !trivial_regexp_p (string))
835 {
836 struct re_pattern_buffer *bufp;
837
838 bufp = compile_pattern (string, &search_regs, trt, posix);
839
840 immediate_quit = 1; /* Quit immediately if user types ^G,
841 because letting this function finish
842 can take too long. */
843 QUIT; /* Do a pending quit right away,
844 to avoid paradoxical behavior */
845 /* Get pointers and sizes of the two strings
846 that make up the visible portion of the buffer. */
847
848 p1 = BEGV_ADDR;
849 s1 = GPT - BEGV;
850 p2 = GAP_END_ADDR;
851 s2 = ZV - GPT;
852 if (s1 < 0)
853 {
854 p2 = p1;
855 s2 = ZV - BEGV;
856 s1 = 0;
857 }
858 if (s2 < 0)
859 {
860 s1 = ZV - BEGV;
861 s2 = 0;
862 }
863 re_match_object = Qnil;
864
865 while (n < 0)
866 {
867 int val;
868 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
869 pos - BEGV, lim - pos, &search_regs,
870 /* Don't allow match past current point */
871 pos - BEGV);
872 if (val == -2)
873 {
874 matcher_overflow ();
875 }
876 if (val >= 0)
877 {
878 j = BEGV;
879 for (i = 0; i < search_regs.num_regs; i++)
880 if (search_regs.start[i] >= 0)
881 {
882 search_regs.start[i] += j;
883 search_regs.end[i] += j;
884 }
885 XSETBUFFER (last_thing_searched, current_buffer);
886 /* Set pos to the new position. */
887 pos = search_regs.start[0];
888 }
889 else
890 {
891 immediate_quit = 0;
892 return (n);
893 }
894 n++;
895 }
896 while (n > 0)
897 {
898 int val;
899 val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
900 pos - BEGV, lim - pos, &search_regs,
901 lim - BEGV);
902 if (val == -2)
903 {
904 matcher_overflow ();
905 }
906 if (val >= 0)
907 {
908 j = BEGV;
909 for (i = 0; i < search_regs.num_regs; i++)
910 if (search_regs.start[i] >= 0)
911 {
912 search_regs.start[i] += j;
913 search_regs.end[i] += j;
914 }
915 XSETBUFFER (last_thing_searched, current_buffer);
916 pos = search_regs.end[0];
917 }
918 else
919 {
920 immediate_quit = 0;
921 return (0 - n);
922 }
923 n--;
924 }
925 immediate_quit = 0;
926 return (pos);
927 }
928 else /* non-RE case */
929 {
930 #ifdef C_ALLOCA
931 int BM_tab_space[0400];
932 BM_tab = &BM_tab_space[0];
933 #else
934 BM_tab = (int *) alloca (0400 * sizeof (int));
935 #endif
936 {
937 unsigned char *patbuf = (unsigned char *) alloca (len);
938 pat = patbuf;
939 while (--len >= 0)
940 {
941 /* If we got here and the RE flag is set, it's because we're
942 dealing with a regexp known to be trivial, so the backslash
943 just quotes the next character. */
944 if (RE && *base_pat == '\\')
945 {
946 len--;
947 base_pat++;
948 }
949 *pat++ = (trt ? XINT (trt[*base_pat++]) : *base_pat++);
950 }
951 len = pat - patbuf;
952 pat = base_pat = patbuf;
953 }
954 /* The general approach is that we are going to maintain that we know */
955 /* the first (closest to the present position, in whatever direction */
956 /* we're searching) character that could possibly be the last */
957 /* (furthest from present position) character of a valid match. We */
958 /* advance the state of our knowledge by looking at that character */
959 /* and seeing whether it indeed matches the last character of the */
960 /* pattern. If it does, we take a closer look. If it does not, we */
961 /* move our pointer (to putative last characters) as far as is */
962 /* logically possible. This amount of movement, which I call a */
963 /* stride, will be the length of the pattern if the actual character */
964 /* appears nowhere in the pattern, otherwise it will be the distance */
965 /* from the last occurrence of that character to the end of the */
966 /* pattern. */
967 /* As a coding trick, an enormous stride is coded into the table for */
968 /* characters that match the last character. This allows use of only */
969 /* a single test, a test for having gone past the end of the */
970 /* permissible match region, to test for both possible matches (when */
971 /* the stride goes past the end immediately) and failure to */
972 /* match (where you get nudged past the end one stride at a time). */
973
974 /* Here we make a "mickey mouse" BM table. The stride of the search */
975 /* is determined only by the last character of the putative match. */
976 /* If that character does not match, we will stride the proper */
977 /* distance to propose a match that superimposes it on the last */
978 /* instance of a character that matches it (per trt), or misses */
979 /* it entirely if there is none. */
980
981 dirlen = len * direction;
982 infinity = dirlen - (lim + pos + len + len) * direction;
983 if (direction < 0)
984 pat = (base_pat += len - 1);
985 BM_tab_base = BM_tab;
986 BM_tab += 0400;
987 j = dirlen; /* to get it in a register */
988 /* A character that does not appear in the pattern induces a */
989 /* stride equal to the pattern length. */
990 while (BM_tab_base != BM_tab)
991 {
992 *--BM_tab = j;
993 *--BM_tab = j;
994 *--BM_tab = j;
995 *--BM_tab = j;
996 }
997 i = 0;
998 while (i != infinity)
999 {
1000 j = pat[i]; i += direction;
1001 if (i == dirlen) i = infinity;
1002 if (trt != 0)
1003 {
1004 k = (j = XINT (trt[j]));
1005 if (i == infinity)
1006 stride_for_teases = BM_tab[j];
1007 BM_tab[j] = dirlen - i;
1008 /* A translation table is accompanied by its inverse -- see */
1009 /* comment following downcase_table for details */
1010 while ((j = (unsigned char) XINT (inverse_trt[j])) != k)
1011 BM_tab[j] = dirlen - i;
1012 }
1013 else
1014 {
1015 if (i == infinity)
1016 stride_for_teases = BM_tab[j];
1017 BM_tab[j] = dirlen - i;
1018 }
1019 /* stride_for_teases tells how much to stride if we get a */
1020 /* match on the far character but are subsequently */
1021 /* disappointed, by recording what the stride would have been */
1022 /* for that character if the last character had been */
1023 /* different. */
1024 }
1025 infinity = dirlen - infinity;
1026 pos += dirlen - ((direction > 0) ? direction : 0);
1027 /* loop invariant - pos points at where last char (first char if reverse)
1028 of pattern would align in a possible match. */
1029 while (n != 0)
1030 {
1031 /* It's been reported that some (broken) compiler thinks that
1032 Boolean expressions in an arithmetic context are unsigned.
1033 Using an explicit ?1:0 prevents this. */
1034 if ((lim - pos - ((direction > 0) ? 1 : 0)) * direction < 0)
1035 return (n * (0 - direction));
1036 /* First we do the part we can by pointers (maybe nothing) */
1037 QUIT;
1038 pat = base_pat;
1039 limit = pos - dirlen + direction;
1040 limit = ((direction > 0)
1041 ? BUFFER_CEILING_OF (limit)
1042 : BUFFER_FLOOR_OF (limit));
1043 /* LIMIT is now the last (not beyond-last!) value
1044 POS can take on without hitting edge of buffer or the gap. */
1045 limit = ((direction > 0)
1046 ? min (lim - 1, min (limit, pos + 20000))
1047 : max (lim, max (limit, pos - 20000)));
1048 if ((limit - pos) * direction > 20)
1049 {
1050 p_limit = POS_ADDR (limit);
1051 p2 = (cursor = POS_ADDR (pos));
1052 /* In this loop, pos + cursor - p2 is the surrogate for pos */
1053 while (1) /* use one cursor setting as long as i can */
1054 {
1055 if (direction > 0) /* worth duplicating */
1056 {
1057 /* Use signed comparison if appropriate
1058 to make cursor+infinity sure to be > p_limit.
1059 Assuming that the buffer lies in a range of addresses
1060 that are all "positive" (as ints) or all "negative",
1061 either kind of comparison will work as long
1062 as we don't step by infinity. So pick the kind
1063 that works when we do step by infinity. */
1064 if ((EMACS_INT) (p_limit + infinity) > (EMACS_INT) p_limit)
1065 while ((EMACS_INT) cursor <= (EMACS_INT) p_limit)
1066 cursor += BM_tab[*cursor];
1067 else
1068 while ((EMACS_UINT) cursor <= (EMACS_UINT) p_limit)
1069 cursor += BM_tab[*cursor];
1070 }
1071 else
1072 {
1073 if ((EMACS_INT) (p_limit + infinity) < (EMACS_INT) p_limit)
1074 while ((EMACS_INT) cursor >= (EMACS_INT) p_limit)
1075 cursor += BM_tab[*cursor];
1076 else
1077 while ((EMACS_UINT) cursor >= (EMACS_UINT) p_limit)
1078 cursor += BM_tab[*cursor];
1079 }
1080 /* If you are here, cursor is beyond the end of the searched region. */
1081 /* This can happen if you match on the far character of the pattern, */
1082 /* because the "stride" of that character is infinity, a number able */
1083 /* to throw you well beyond the end of the search. It can also */
1084 /* happen if you fail to match within the permitted region and would */
1085 /* otherwise try a character beyond that region */
1086 if ((cursor - p_limit) * direction <= len)
1087 break; /* a small overrun is genuine */
1088 cursor -= infinity; /* large overrun = hit */
1089 i = dirlen - direction;
1090 if (trt != 0)
1091 {
1092 while ((i -= direction) + direction != 0)
1093 if (pat[i] != XINT (trt[*(cursor -= direction)]))
1094 break;
1095 }
1096 else
1097 {
1098 while ((i -= direction) + direction != 0)
1099 if (pat[i] != *(cursor -= direction))
1100 break;
1101 }
1102 cursor += dirlen - i - direction; /* fix cursor */
1103 if (i + direction == 0)
1104 {
1105 cursor -= direction;
1106
1107 set_search_regs (pos + cursor - p2 + ((direction > 0)
1108 ? 1 - len : 0),
1109 len);
1110
1111 if ((n -= direction) != 0)
1112 cursor += dirlen; /* to resume search */
1113 else
1114 return ((direction > 0)
1115 ? search_regs.end[0] : search_regs.start[0]);
1116 }
1117 else
1118 cursor += stride_for_teases; /* <sigh> we lose - */
1119 }
1120 pos += cursor - p2;
1121 }
1122 else
1123 /* Now we'll pick up a clump that has to be done the hard */
1124 /* way because it covers a discontinuity */
1125 {
1126 limit = ((direction > 0)
1127 ? BUFFER_CEILING_OF (pos - dirlen + 1)
1128 : BUFFER_FLOOR_OF (pos - dirlen - 1));
1129 limit = ((direction > 0)
1130 ? min (limit + len, lim - 1)
1131 : max (limit - len, lim));
1132 /* LIMIT is now the last value POS can have
1133 and still be valid for a possible match. */
1134 while (1)
1135 {
1136 /* This loop can be coded for space rather than */
1137 /* speed because it will usually run only once. */
1138 /* (the reach is at most len + 21, and typically */
1139 /* does not exceed len) */
1140 while ((limit - pos) * direction >= 0)
1141 pos += BM_tab[FETCH_BYTE (pos)];
1142 /* now run the same tests to distinguish going off the */
1143 /* end, a match or a phony match. */
1144 if ((pos - limit) * direction <= len)
1145 break; /* ran off the end */
1146 /* Found what might be a match.
1147 Set POS back to last (first if reverse) char pos. */
1148 pos -= infinity;
1149 i = dirlen - direction;
1150 while ((i -= direction) + direction != 0)
1151 {
1152 pos -= direction;
1153 if (pat[i] != (trt != 0
1154 ? XINT (trt[FETCH_BYTE (pos)])
1155 : FETCH_BYTE (pos)))
1156 break;
1157 }
1158 /* Above loop has moved POS part or all the way
1159 back to the first char pos (last char pos if reverse).
1160 Set it once again at the last (first if reverse) char. */
1161 pos += dirlen - i- direction;
1162 if (i + direction == 0)
1163 {
1164 pos -= direction;
1165
1166 set_search_regs (pos + ((direction > 0) ? 1 - len : 0),
1167 len);
1168
1169 if ((n -= direction) != 0)
1170 pos += dirlen; /* to resume search */
1171 else
1172 return ((direction > 0)
1173 ? search_regs.end[0] : search_regs.start[0]);
1174 }
1175 else
1176 pos += stride_for_teases;
1177 }
1178 }
1179 /* We have done one clump. Can we continue? */
1180 if ((lim - pos) * direction < 0)
1181 return ((0 - n) * direction);
1182 }
1183 return pos;
1184 }
1185 }
1186
1187 /* Record beginning BEG and end BEG + LEN
1188 for a match just found in the current buffer. */
1189
1190 static void
1191 set_search_regs (beg, len)
1192 int beg, len;
1193 {
1194 /* Make sure we have registers in which to store
1195 the match position. */
1196 if (search_regs.num_regs == 0)
1197 {
1198 search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1199 search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
1200 search_regs.num_regs = 2;
1201 }
1202
1203 search_regs.start[0] = beg;
1204 search_regs.end[0] = beg + len;
1205 XSETBUFFER (last_thing_searched, current_buffer);
1206 }
1207 \f
1208 /* Given a string of words separated by word delimiters,
1209 compute a regexp that matches those exact words
1210 separated by arbitrary punctuation. */
1211
1212 static Lisp_Object
1213 wordify (string)
1214 Lisp_Object string;
1215 {
1216 register unsigned char *p, *o;
1217 register int i, len, punct_count = 0, word_count = 0;
1218 Lisp_Object val;
1219
1220 CHECK_STRING (string, 0);
1221 p = XSTRING (string)->data;
1222 len = XSTRING (string)->size;
1223
1224 for (i = 0; i < len; i++)
1225 if (SYNTAX (p[i]) != Sword)
1226 {
1227 punct_count++;
1228 if (i > 0 && SYNTAX (p[i-1]) == Sword) word_count++;
1229 }
1230 if (SYNTAX (p[len-1]) == Sword) word_count++;
1231 if (!word_count) return build_string ("");
1232
1233 val = make_string (p, len - punct_count + 5 * (word_count - 1) + 4);
1234
1235 o = XSTRING (val)->data;
1236 *o++ = '\\';
1237 *o++ = 'b';
1238
1239 for (i = 0; i < len; i++)
1240 if (SYNTAX (p[i]) == Sword)
1241 *o++ = p[i];
1242 else if (i > 0 && SYNTAX (p[i-1]) == Sword && --word_count)
1243 {
1244 *o++ = '\\';
1245 *o++ = 'W';
1246 *o++ = '\\';
1247 *o++ = 'W';
1248 *o++ = '*';
1249 }
1250
1251 *o++ = '\\';
1252 *o++ = 'b';
1253
1254 return val;
1255 }
1256 \f
1257 DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
1258 "MSearch backward: ",
1259 "Search backward from point for STRING.\n\
1260 Set point to the beginning of the occurrence found, and return point.\n\
1261 An optional second argument bounds the search; it is a buffer position.\n\
1262 The match found must not extend before that position.\n\
1263 Optional third argument, if t, means if fail just return nil (no error).\n\
1264 If not nil and not t, position at limit of search and return nil.\n\
1265 Optional fourth argument is repeat count--search for successive occurrences.\n\
1266 See also the functions `match-beginning', `match-end' and `replace-match'.")
1267 (string, bound, noerror, count)
1268 Lisp_Object string, bound, noerror, count;
1269 {
1270 return search_command (string, bound, noerror, count, -1, 0, 0);
1271 }
1272
1273 DEFUN ("search-forward", Fsearch_forward, Ssearch_forward, 1, 4, "MSearch: ",
1274 "Search forward from point for STRING.\n\
1275 Set point to the end of the occurrence found, and return point.\n\
1276 An optional second argument bounds the search; it is a buffer position.\n\
1277 The match found must not extend after that position. nil is equivalent\n\
1278 to (point-max).\n\
1279 Optional third argument, if t, means if fail just return nil (no error).\n\
1280 If not nil and not t, move to limit of search and return nil.\n\
1281 Optional fourth argument is repeat count--search for successive occurrences.\n\
1282 See also the functions `match-beginning', `match-end' and `replace-match'.")
1283 (string, bound, noerror, count)
1284 Lisp_Object string, bound, noerror, count;
1285 {
1286 return search_command (string, bound, noerror, count, 1, 0, 0);
1287 }
1288
1289 DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
1290 "sWord search backward: ",
1291 "Search backward from point for STRING, ignoring differences in punctuation.\n\
1292 Set point to the beginning of the occurrence found, and return point.\n\
1293 An optional second argument bounds the search; it is a buffer position.\n\
1294 The match found must not extend before that position.\n\
1295 Optional third argument, if t, means if fail just return nil (no error).\n\
1296 If not nil and not t, move to limit of search and return nil.\n\
1297 Optional fourth argument is repeat count--search for successive occurrences.")
1298 (string, bound, noerror, count)
1299 Lisp_Object string, bound, noerror, count;
1300 {
1301 return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
1302 }
1303
1304 DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
1305 "sWord search: ",
1306 "Search forward from point for STRING, ignoring differences in punctuation.\n\
1307 Set point to the end of the occurrence found, and return point.\n\
1308 An optional second argument bounds the search; it is a buffer position.\n\
1309 The match found must not extend after that position.\n\
1310 Optional third argument, if t, means if fail just return nil (no error).\n\
1311 If not nil and not t, move to limit of search and return nil.\n\
1312 Optional fourth argument is repeat count--search for successive occurrences.")
1313 (string, bound, noerror, count)
1314 Lisp_Object string, bound, noerror, count;
1315 {
1316 return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
1317 }
1318
1319 DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
1320 "sRE search backward: ",
1321 "Search backward from point for match for regular expression REGEXP.\n\
1322 Set point to the beginning of the match, and return point.\n\
1323 The match found is the one starting last in the buffer\n\
1324 and yet ending before the origin of the search.\n\
1325 An optional second argument bounds the search; it is a buffer position.\n\
1326 The match found must start at or after that position.\n\
1327 Optional third argument, if t, means if fail just return nil (no error).\n\
1328 If not nil and not t, move to limit of search and return nil.\n\
1329 Optional fourth argument is repeat count--search for successive occurrences.\n\
1330 See also the functions `match-beginning', `match-end' and `replace-match'.")
1331 (regexp, bound, noerror, count)
1332 Lisp_Object regexp, bound, noerror, count;
1333 {
1334 return search_command (regexp, bound, noerror, count, -1, 1, 0);
1335 }
1336
1337 DEFUN ("re-search-forward", Fre_search_forward, Sre_search_forward, 1, 4,
1338 "sRE search: ",
1339 "Search forward from point for regular expression REGEXP.\n\
1340 Set point to the end of the occurrence found, and return point.\n\
1341 An optional second argument bounds the search; it is a buffer position.\n\
1342 The match found must not extend after that position.\n\
1343 Optional third argument, if t, means if fail just return nil (no error).\n\
1344 If not nil and not t, move to limit of search and return nil.\n\
1345 Optional fourth argument is repeat count--search for successive occurrences.\n\
1346 See also the functions `match-beginning', `match-end' and `replace-match'.")
1347 (regexp, bound, noerror, count)
1348 Lisp_Object regexp, bound, noerror, count;
1349 {
1350 return search_command (regexp, bound, noerror, count, 1, 1, 0);
1351 }
1352
1353 DEFUN ("posix-search-backward", Fposix_search_backward, Sposix_search_backward, 1, 4,
1354 "sPosix search backward: ",
1355 "Search backward from point for match for regular expression REGEXP.\n\
1356 Find the longest match in accord with Posix regular expression rules.\n\
1357 Set point to the beginning of the match, and return point.\n\
1358 The match found is the one starting last in the buffer\n\
1359 and yet ending before the origin of the search.\n\
1360 An optional second argument bounds the search; it is a buffer position.\n\
1361 The match found must start at or after that position.\n\
1362 Optional third argument, if t, means if fail just return nil (no error).\n\
1363 If not nil and not t, move to limit of search and return nil.\n\
1364 Optional fourth argument is repeat count--search for successive occurrences.\n\
1365 See also the functions `match-beginning', `match-end' and `replace-match'.")
1366 (regexp, bound, noerror, count)
1367 Lisp_Object regexp, bound, noerror, count;
1368 {
1369 return search_command (regexp, bound, noerror, count, -1, 1, 1);
1370 }
1371
1372 DEFUN ("posix-search-forward", Fposix_search_forward, Sposix_search_forward, 1, 4,
1373 "sPosix search: ",
1374 "Search forward from point for regular expression REGEXP.\n\
1375 Find the longest match in accord with Posix regular expression rules.\n\
1376 Set point to the end of the occurrence found, and return point.\n\
1377 An optional second argument bounds the search; it is a buffer position.\n\
1378 The match found must not extend after that position.\n\
1379 Optional third argument, if t, means if fail just return nil (no error).\n\
1380 If not nil and not t, move to limit of search and return nil.\n\
1381 Optional fourth argument is repeat count--search for successive occurrences.\n\
1382 See also the functions `match-beginning', `match-end' and `replace-match'.")
1383 (regexp, bound, noerror, count)
1384 Lisp_Object regexp, bound, noerror, count;
1385 {
1386 return search_command (regexp, bound, noerror, count, 1, 1, 1);
1387 }
1388 \f
1389 DEFUN ("replace-match", Freplace_match, Sreplace_match, 1, 5, 0,
1390 "Replace text matched by last search with NEWTEXT.\n\
1391 If second arg FIXEDCASE is non-nil, do not alter case of replacement text.\n\
1392 Otherwise maybe capitalize the whole text, or maybe just word initials,\n\
1393 based on the replaced text.\n\
1394 If the replaced text has only capital letters\n\
1395 and has at least one multiletter word, convert NEWTEXT to all caps.\n\
1396 If the replaced text has at least one word starting with a capital letter,\n\
1397 then capitalize each word in NEWTEXT.\n\n\
1398 If third arg LITERAL is non-nil, insert NEWTEXT literally.\n\
1399 Otherwise treat `\\' as special:\n\
1400 `\\&' in NEWTEXT means substitute original matched text.\n\
1401 `\\N' means substitute what matched the Nth `\\(...\\)'.\n\
1402 If Nth parens didn't match, substitute nothing.\n\
1403 `\\\\' means insert one `\\'.\n\
1404 FIXEDCASE and LITERAL are optional arguments.\n\
1405 Leaves point at end of replacement text.\n\
1406 \n\
1407 The optional fourth argument STRING can be a string to modify.\n\
1408 In that case, this function creates and returns a new string\n\
1409 which is made by replacing the part of STRING that was matched.\n\
1410 \n\
1411 The optional fifth argument SUBEXP specifies a subexpression of the match.\n\
1412 It says to replace just that subexpression instead of the whole match.\n\
1413 This is useful only after a regular expression search or match\n\
1414 since only regular expressions have distinguished subexpressions.")
1415 (newtext, fixedcase, literal, string, subexp)
1416 Lisp_Object newtext, fixedcase, literal, string, subexp;
1417 {
1418 enum { nochange, all_caps, cap_initial } case_action;
1419 register int pos, last;
1420 int some_multiletter_word;
1421 int some_lowercase;
1422 int some_uppercase;
1423 int some_nonuppercase_initial;
1424 register int c, prevc;
1425 int inslen;
1426 int sub;
1427 int opoint, newpoint;
1428
1429 CHECK_STRING (newtext, 0);
1430
1431 if (! NILP (string))
1432 CHECK_STRING (string, 4);
1433
1434 case_action = nochange; /* We tried an initialization */
1435 /* but some C compilers blew it */
1436
1437 if (search_regs.num_regs <= 0)
1438 error ("replace-match called before any match found");
1439
1440 if (NILP (subexp))
1441 sub = 0;
1442 else
1443 {
1444 CHECK_NUMBER (subexp, 3);
1445 sub = XINT (subexp);
1446 if (sub < 0 || sub >= search_regs.num_regs)
1447 args_out_of_range (subexp, make_number (search_regs.num_regs));
1448 }
1449
1450 if (NILP (string))
1451 {
1452 if (search_regs.start[sub] < BEGV
1453 || search_regs.start[sub] > search_regs.end[sub]
1454 || search_regs.end[sub] > ZV)
1455 args_out_of_range (make_number (search_regs.start[sub]),
1456 make_number (search_regs.end[sub]));
1457 }
1458 else
1459 {
1460 if (search_regs.start[sub] < 0
1461 || search_regs.start[sub] > search_regs.end[sub]
1462 || search_regs.end[sub] > XSTRING (string)->size)
1463 args_out_of_range (make_number (search_regs.start[sub]),
1464 make_number (search_regs.end[sub]));
1465 }
1466
1467 if (NILP (fixedcase))
1468 {
1469 /* Decide how to casify by examining the matched text. */
1470
1471 last = search_regs.end[sub];
1472 prevc = '\n';
1473 case_action = all_caps;
1474
1475 /* some_multiletter_word is set nonzero if any original word
1476 is more than one letter long. */
1477 some_multiletter_word = 0;
1478 some_lowercase = 0;
1479 some_nonuppercase_initial = 0;
1480 some_uppercase = 0;
1481
1482 for (pos = search_regs.start[sub]; pos < last; pos++)
1483 {
1484 if (NILP (string))
1485 c = FETCH_BYTE (pos);
1486 else
1487 c = XSTRING (string)->data[pos];
1488
1489 if (LOWERCASEP (c))
1490 {
1491 /* Cannot be all caps if any original char is lower case */
1492
1493 some_lowercase = 1;
1494 if (SYNTAX (prevc) != Sword)
1495 some_nonuppercase_initial = 1;
1496 else
1497 some_multiletter_word = 1;
1498 }
1499 else if (!NOCASEP (c))
1500 {
1501 some_uppercase = 1;
1502 if (SYNTAX (prevc) != Sword)
1503 ;
1504 else
1505 some_multiletter_word = 1;
1506 }
1507 else
1508 {
1509 /* If the initial is a caseless word constituent,
1510 treat that like a lowercase initial. */
1511 if (SYNTAX (prevc) != Sword)
1512 some_nonuppercase_initial = 1;
1513 }
1514
1515 prevc = c;
1516 }
1517
1518 /* Convert to all caps if the old text is all caps
1519 and has at least one multiletter word. */
1520 if (! some_lowercase && some_multiletter_word)
1521 case_action = all_caps;
1522 /* Capitalize each word, if the old text has all capitalized words. */
1523 else if (!some_nonuppercase_initial && some_multiletter_word)
1524 case_action = cap_initial;
1525 else if (!some_nonuppercase_initial && some_uppercase)
1526 /* Should x -> yz, operating on X, give Yz or YZ?
1527 We'll assume the latter. */
1528 case_action = all_caps;
1529 else
1530 case_action = nochange;
1531 }
1532
1533 /* Do replacement in a string. */
1534 if (!NILP (string))
1535 {
1536 Lisp_Object before, after;
1537
1538 before = Fsubstring (string, make_number (0),
1539 make_number (search_regs.start[sub]));
1540 after = Fsubstring (string, make_number (search_regs.end[sub]), Qnil);
1541
1542 /* Substitute parts of the match into NEWTEXT
1543 if desired. */
1544 if (NILP (literal))
1545 {
1546 int lastpos = -1;
1547 /* We build up the substituted string in ACCUM. */
1548 Lisp_Object accum;
1549 Lisp_Object middle;
1550
1551 accum = Qnil;
1552
1553 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
1554 {
1555 int substart = -1;
1556 int subend;
1557 int delbackslash = 0;
1558
1559 c = XSTRING (newtext)->data[pos];
1560 if (c == '\\')
1561 {
1562 c = XSTRING (newtext)->data[++pos];
1563 if (c == '&')
1564 {
1565 substart = search_regs.start[sub];
1566 subend = search_regs.end[sub];
1567 }
1568 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
1569 {
1570 if (search_regs.start[c - '0'] >= 0)
1571 {
1572 substart = search_regs.start[c - '0'];
1573 subend = search_regs.end[c - '0'];
1574 }
1575 }
1576 else if (c == '\\')
1577 delbackslash = 1;
1578 else
1579 error ("Invalid use of `\\' in replacement text");
1580 }
1581 if (substart >= 0)
1582 {
1583 if (pos - 1 != lastpos + 1)
1584 middle = Fsubstring (newtext,
1585 make_number (lastpos + 1),
1586 make_number (pos - 1));
1587 else
1588 middle = Qnil;
1589 accum = concat3 (accum, middle,
1590 Fsubstring (string, make_number (substart),
1591 make_number (subend)));
1592 lastpos = pos;
1593 }
1594 else if (delbackslash)
1595 {
1596 middle = Fsubstring (newtext, make_number (lastpos + 1),
1597 make_number (pos));
1598 accum = concat2 (accum, middle);
1599 lastpos = pos;
1600 }
1601 }
1602
1603 if (pos != lastpos + 1)
1604 middle = Fsubstring (newtext, make_number (lastpos + 1),
1605 make_number (pos));
1606 else
1607 middle = Qnil;
1608
1609 newtext = concat2 (accum, middle);
1610 }
1611
1612 /* Do case substitution in NEWTEXT if desired. */
1613 if (case_action == all_caps)
1614 newtext = Fupcase (newtext);
1615 else if (case_action == cap_initial)
1616 newtext = Fupcase_initials (newtext);
1617
1618 return concat3 (before, newtext, after);
1619 }
1620
1621 /* Record point, the move (quietly) to the start of the match. */
1622 if (PT > search_regs.start[sub])
1623 opoint = PT - ZV;
1624 else
1625 opoint = PT;
1626
1627 temp_set_point (search_regs.start[sub], current_buffer);
1628
1629 /* We insert the replacement text before the old text, and then
1630 delete the original text. This means that markers at the
1631 beginning or end of the original will float to the corresponding
1632 position in the replacement. */
1633 if (!NILP (literal))
1634 Finsert_and_inherit (1, &newtext);
1635 else
1636 {
1637 struct gcpro gcpro1;
1638 GCPRO1 (newtext);
1639
1640 for (pos = 0; pos < XSTRING (newtext)->size; pos++)
1641 {
1642 int offset = PT - search_regs.start[sub];
1643
1644 c = XSTRING (newtext)->data[pos];
1645 if (c == '\\')
1646 {
1647 c = XSTRING (newtext)->data[++pos];
1648 if (c == '&')
1649 Finsert_buffer_substring
1650 (Fcurrent_buffer (),
1651 make_number (search_regs.start[sub] + offset),
1652 make_number (search_regs.end[sub] + offset));
1653 else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
1654 {
1655 if (search_regs.start[c - '0'] >= 1)
1656 Finsert_buffer_substring
1657 (Fcurrent_buffer (),
1658 make_number (search_regs.start[c - '0'] + offset),
1659 make_number (search_regs.end[c - '0'] + offset));
1660 }
1661 else if (c == '\\')
1662 insert_char (c);
1663 else
1664 error ("Invalid use of `\\' in replacement text");
1665 }
1666 else
1667 insert_char (c);
1668 }
1669 UNGCPRO;
1670 }
1671
1672 inslen = PT - (search_regs.start[sub]);
1673 del_range (search_regs.start[sub] + inslen, search_regs.end[sub] + inslen);
1674
1675 if (case_action == all_caps)
1676 Fupcase_region (make_number (PT - inslen), make_number (PT));
1677 else if (case_action == cap_initial)
1678 Fupcase_initials_region (make_number (PT - inslen), make_number (PT));
1679
1680 newpoint = PT;
1681
1682 /* Put point back where it was in the text. */
1683 if (opoint <= 0)
1684 temp_set_point (opoint + ZV, current_buffer);
1685 else
1686 temp_set_point (opoint, current_buffer);
1687
1688 /* Now move point "officially" to the start of the inserted replacement. */
1689 move_if_not_intangible (newpoint);
1690
1691 return Qnil;
1692 }
1693 \f
1694 static Lisp_Object
1695 match_limit (num, beginningp)
1696 Lisp_Object num;
1697 int beginningp;
1698 {
1699 register int n;
1700
1701 CHECK_NUMBER (num, 0);
1702 n = XINT (num);
1703 if (n < 0 || n >= search_regs.num_regs)
1704 args_out_of_range (num, make_number (search_regs.num_regs));
1705 if (search_regs.num_regs <= 0
1706 || search_regs.start[n] < 0)
1707 return Qnil;
1708 return (make_number ((beginningp) ? search_regs.start[n]
1709 : search_regs.end[n]));
1710 }
1711
1712 DEFUN ("match-beginning", Fmatch_beginning, Smatch_beginning, 1, 1, 0,
1713 "Return position of start of text matched by last search.\n\
1714 SUBEXP, a number, specifies which parenthesized expression in the last\n\
1715 regexp.\n\
1716 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1717 SUBEXP pairs.\n\
1718 Zero means the entire text matched by the whole regexp or whole string.")
1719 (subexp)
1720 Lisp_Object subexp;
1721 {
1722 return match_limit (subexp, 1);
1723 }
1724
1725 DEFUN ("match-end", Fmatch_end, Smatch_end, 1, 1, 0,
1726 "Return position of end of text matched by last search.\n\
1727 SUBEXP, a number, specifies which parenthesized expression in the last\n\
1728 regexp.\n\
1729 Value is nil if SUBEXPth pair didn't match, or there were less than\n\
1730 SUBEXP pairs.\n\
1731 Zero means the entire text matched by the whole regexp or whole string.")
1732 (subexp)
1733 Lisp_Object subexp;
1734 {
1735 return match_limit (subexp, 0);
1736 }
1737
1738 DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
1739 "Return a list containing all info on what the last search matched.\n\
1740 Element 2N is `(match-beginning N)'; element 2N + 1 is `(match-end N)'.\n\
1741 All the elements are markers or nil (nil if the Nth pair didn't match)\n\
1742 if the last match was on a buffer; integers or nil if a string was matched.\n\
1743 Use `store-match-data' to reinstate the data in this list.\n\
1744 \n\
1745 If INTEGERS (the optional first argument) is non-nil, always use integers\n\
1746 \(rather than markers) to represent buffer positions.\n\
1747 If REUSE is a list, reuse it as part of the value. If REUSE is long enough\n\
1748 to hold all the values, and if INTEGERS is non-nil, no consing is done.")
1749 (integers, reuse)
1750 Lisp_Object integers, reuse;
1751 {
1752 Lisp_Object tail, prev;
1753 Lisp_Object *data;
1754 int i, len;
1755
1756 if (NILP (last_thing_searched))
1757 return Qnil;
1758
1759 data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
1760 * sizeof (Lisp_Object));
1761
1762 len = -1;
1763 for (i = 0; i < search_regs.num_regs; i++)
1764 {
1765 int start = search_regs.start[i];
1766 if (start >= 0)
1767 {
1768 if (EQ (last_thing_searched, Qt)
1769 || ! NILP (integers))
1770 {
1771 XSETFASTINT (data[2 * i], start);
1772 XSETFASTINT (data[2 * i + 1], search_regs.end[i]);
1773 }
1774 else if (BUFFERP (last_thing_searched))
1775 {
1776 data[2 * i] = Fmake_marker ();
1777 Fset_marker (data[2 * i],
1778 make_number (start),
1779 last_thing_searched);
1780 data[2 * i + 1] = Fmake_marker ();
1781 Fset_marker (data[2 * i + 1],
1782 make_number (search_regs.end[i]),
1783 last_thing_searched);
1784 }
1785 else
1786 /* last_thing_searched must always be Qt, a buffer, or Qnil. */
1787 abort ();
1788
1789 len = i;
1790 }
1791 else
1792 data[2 * i] = data [2 * i + 1] = Qnil;
1793 }
1794
1795 /* If REUSE is not usable, cons up the values and return them. */
1796 if (! CONSP (reuse))
1797 return Flist (2 * len + 2, data);
1798
1799 /* If REUSE is a list, store as many value elements as will fit
1800 into the elements of REUSE. */
1801 for (i = 0, tail = reuse; CONSP (tail);
1802 i++, tail = XCONS (tail)->cdr)
1803 {
1804 if (i < 2 * len + 2)
1805 XCONS (tail)->car = data[i];
1806 else
1807 XCONS (tail)->car = Qnil;
1808 prev = tail;
1809 }
1810
1811 /* If we couldn't fit all value elements into REUSE,
1812 cons up the rest of them and add them to the end of REUSE. */
1813 if (i < 2 * len + 2)
1814 XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
1815
1816 return reuse;
1817 }
1818
1819
1820 DEFUN ("store-match-data", Fstore_match_data, Sstore_match_data, 1, 1, 0,
1821 "Set internal data on last search match from elements of LIST.\n\
1822 LIST should have been created by calling `match-data' previously.")
1823 (list)
1824 register Lisp_Object list;
1825 {
1826 register int i;
1827 register Lisp_Object marker;
1828
1829 if (running_asynch_code)
1830 save_search_regs ();
1831
1832 if (!CONSP (list) && !NILP (list))
1833 list = wrong_type_argument (Qconsp, list);
1834
1835 /* Unless we find a marker with a buffer in LIST, assume that this
1836 match data came from a string. */
1837 last_thing_searched = Qt;
1838
1839 /* Allocate registers if they don't already exist. */
1840 {
1841 int length = XFASTINT (Flength (list)) / 2;
1842
1843 if (length > search_regs.num_regs)
1844 {
1845 if (search_regs.num_regs == 0)
1846 {
1847 search_regs.start
1848 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1849 search_regs.end
1850 = (regoff_t *) xmalloc (length * sizeof (regoff_t));
1851 }
1852 else
1853 {
1854 search_regs.start
1855 = (regoff_t *) xrealloc (search_regs.start,
1856 length * sizeof (regoff_t));
1857 search_regs.end
1858 = (regoff_t *) xrealloc (search_regs.end,
1859 length * sizeof (regoff_t));
1860 }
1861
1862 search_regs.num_regs = length;
1863 }
1864 }
1865
1866 for (i = 0; i < search_regs.num_regs; i++)
1867 {
1868 marker = Fcar (list);
1869 if (NILP (marker))
1870 {
1871 search_regs.start[i] = -1;
1872 list = Fcdr (list);
1873 }
1874 else
1875 {
1876 if (MARKERP (marker))
1877 {
1878 if (XMARKER (marker)->buffer == 0)
1879 XSETFASTINT (marker, 0);
1880 else
1881 XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
1882 }
1883
1884 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1885 search_regs.start[i] = XINT (marker);
1886 list = Fcdr (list);
1887
1888 marker = Fcar (list);
1889 if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
1890 XSETFASTINT (marker, 0);
1891
1892 CHECK_NUMBER_COERCE_MARKER (marker, 0);
1893 search_regs.end[i] = XINT (marker);
1894 }
1895 list = Fcdr (list);
1896 }
1897
1898 return Qnil;
1899 }
1900
1901 /* If non-zero the match data have been saved in saved_search_regs
1902 during the execution of a sentinel or filter. */
1903 static int search_regs_saved;
1904 static struct re_registers saved_search_regs;
1905
1906 /* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
1907 if asynchronous code (filter or sentinel) is running. */
1908 static void
1909 save_search_regs ()
1910 {
1911 if (!search_regs_saved)
1912 {
1913 saved_search_regs.num_regs = search_regs.num_regs;
1914 saved_search_regs.start = search_regs.start;
1915 saved_search_regs.end = search_regs.end;
1916 search_regs.num_regs = 0;
1917 search_regs.start = 0;
1918 search_regs.end = 0;
1919
1920 search_regs_saved = 1;
1921 }
1922 }
1923
1924 /* Called upon exit from filters and sentinels. */
1925 void
1926 restore_match_data ()
1927 {
1928 if (search_regs_saved)
1929 {
1930 if (search_regs.num_regs > 0)
1931 {
1932 xfree (search_regs.start);
1933 xfree (search_regs.end);
1934 }
1935 search_regs.num_regs = saved_search_regs.num_regs;
1936 search_regs.start = saved_search_regs.start;
1937 search_regs.end = saved_search_regs.end;
1938
1939 search_regs_saved = 0;
1940 }
1941 }
1942
1943 /* Quote a string to inactivate reg-expr chars */
1944
1945 DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
1946 "Return a regexp string which matches exactly STRING and nothing else.")
1947 (string)
1948 Lisp_Object string;
1949 {
1950 register unsigned char *in, *out, *end;
1951 register unsigned char *temp;
1952
1953 CHECK_STRING (string, 0);
1954
1955 temp = (unsigned char *) alloca (XSTRING (string)->size * 2);
1956
1957 /* Now copy the data into the new string, inserting escapes. */
1958
1959 in = XSTRING (string)->data;
1960 end = in + XSTRING (string)->size;
1961 out = temp;
1962
1963 for (; in != end; in++)
1964 {
1965 if (*in == '[' || *in == ']'
1966 || *in == '*' || *in == '.' || *in == '\\'
1967 || *in == '?' || *in == '+'
1968 || *in == '^' || *in == '$')
1969 *out++ = '\\';
1970 *out++ = *in;
1971 }
1972
1973 return make_string (temp, out - temp);
1974 }
1975 \f
1976 syms_of_search ()
1977 {
1978 register int i;
1979
1980 for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
1981 {
1982 searchbufs[i].buf.allocated = 100;
1983 searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
1984 searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
1985 searchbufs[i].regexp = Qnil;
1986 staticpro (&searchbufs[i].regexp);
1987 searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
1988 }
1989 searchbuf_head = &searchbufs[0];
1990
1991 Qsearch_failed = intern ("search-failed");
1992 staticpro (&Qsearch_failed);
1993 Qinvalid_regexp = intern ("invalid-regexp");
1994 staticpro (&Qinvalid_regexp);
1995
1996 Fput (Qsearch_failed, Qerror_conditions,
1997 Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
1998 Fput (Qsearch_failed, Qerror_message,
1999 build_string ("Search failed"));
2000
2001 Fput (Qinvalid_regexp, Qerror_conditions,
2002 Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
2003 Fput (Qinvalid_regexp, Qerror_message,
2004 build_string ("Invalid regexp"));
2005
2006 last_thing_searched = Qnil;
2007 staticpro (&last_thing_searched);
2008
2009 defsubr (&Slooking_at);
2010 defsubr (&Sposix_looking_at);
2011 defsubr (&Sstring_match);
2012 defsubr (&Sposix_string_match);
2013 defsubr (&Ssearch_forward);
2014 defsubr (&Ssearch_backward);
2015 defsubr (&Sword_search_forward);
2016 defsubr (&Sword_search_backward);
2017 defsubr (&Sre_search_forward);
2018 defsubr (&Sre_search_backward);
2019 defsubr (&Sposix_search_forward);
2020 defsubr (&Sposix_search_backward);
2021 defsubr (&Sreplace_match);
2022 defsubr (&Smatch_beginning);
2023 defsubr (&Smatch_end);
2024 defsubr (&Smatch_data);
2025 defsubr (&Sstore_match_data);
2026 defsubr (&Sregexp_quote);
2027 }