/* String search routines for GNU Emacs.
- Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
+ Copyright (C) 1985, 86,87,93,94,97,98, 1999, 2004
+ Free Software Foundation, Inc.
This file is part of GNU Emacs.
struct regexp_cache
{
struct regexp_cache *next;
- Lisp_Object regexp;
+ Lisp_Object regexp, whitespace_regexp;
struct re_pattern_buffer buf;
char fastmap[0400];
/* Nonzero means regexp was compiled to do full POSIX backtracking. */
Lisp_Object Qinvalid_regexp;
+Lisp_Object Vsearch_spaces_regexp;
+
static void set_search_regs ();
static void save_search_regs ();
static int simple_search ();
for this pattern. 0 means backtrack only enough to get a valid match.
MULTIBYTE is nonzero if we want to handle multibyte characters in
PATTERN. 0 means all multibyte characters are recognized just as
- sequences of binary data. */
+ sequences of binary data.
+
+ The behavior also depends on Vsearch_spaces_regexp. */
static void
compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
cp->buf.translate = (! NILP (translate) ? translate : make_number (0));
cp->posix = posix;
cp->buf.multibyte = multibyte;
+ cp->whitespace_regexp = Vsearch_spaces_regexp;
BLOCK_INPUT;
old = re_set_syntax (RE_SYNTAX_EMACS
| (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
+
+ re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
+ : SDATA (Vsearch_spaces_regexp));
+
val = (char *) re_compile_pattern ((char *)raw_pattern,
raw_pattern_size, &cp->buf);
+
+ re_set_whitespace_regexp (NULL);
+
re_set_syntax (old);
UNBLOCK_INPUT;
if (val)
{
cp->buf.allocated = cp->buf.used;
cp->buf.buffer
- = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
+ = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
}
}
&& !NILP (Fstring_equal (cp->regexp, pattern))
&& EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
&& cp->posix == posix
- && cp->buf.multibyte == multibyte)
+ && cp->buf.multibyte == multibyte
+ && !NILP (Fequal (cp->whitespace_regexp, Vsearch_spaces_regexp)))
break;
/* If we're at the end of the cache, compile into the nil cell
}
re_match_object = Qnil;
-
+
i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
PT_BYTE - BEGV_BYTE, &search_regs,
ZV_BYTE - BEGV_BYTE);
immediate_quit = 0;
-
+
if (i == -2)
matcher_overflow ();
STRING_MULTIBYTE (string));
immediate_quit = 1;
re_match_object = string;
-
+
val = re_search (bufp, (char *) SDATA (string),
SBYTES (string), pos_byte,
SBYTES (string) - pos_byte,
If third arg START is non-nil, start search at that index in STRING.
For index of first char beyond the match, do (match-end 0).
`match-end' and `match-beginning' also give indices of substrings
-matched by parenthesis constructs in the pattern. */)
+matched by parenthesis constructs in the pattern.
+
+You can use the function `match-string' to extract the substrings
+matched by the parenthesis constructions in REGEXP. */)
(regexp, string, start)
Lisp_Object regexp, string, start;
{
0, STRING_MULTIBYTE (string));
immediate_quit = 1;
re_match_object = string;
-
+
val = re_search (bufp, (char *) SDATA (string),
SBYTES (string), 0,
SBYTES (string), 0);
immediate_quit = 0;
return val;
}
+
+/* Like fast_string_match but ignore case. */
+
+int
+fast_string_match_ignore_case (regexp, string)
+ Lisp_Object regexp, string;
+{
+ int val;
+ struct re_pattern_buffer *bufp;
+
+ bufp = compile_pattern (regexp, 0, Vascii_downcase_table,
+ 0, STRING_MULTIBYTE (string));
+ immediate_quit = 1;
+ re_match_object = string;
+
+ val = re_search (bufp, (char *) SDATA (string),
+ SBYTES (string), 0,
+ SBYTES (string), 0);
+ immediate_quit = 0;
+ return val;
+}
\f
/* The newline cache: remembering which sections of text have no newlines. */
direction indicated by COUNT.
If we find COUNT instances, set *SHORTAGE to zero, and return the
- position after the COUNTth match. Note that for reverse motion
+ position past the COUNTth match. Note that for reverse motion
this is not the same as the usual convention for Emacs motion commands.
If we don't find COUNT instances before reaching END, set *SHORTAGE
int allow_quit;
{
struct region_cache *newline_cache;
- int direction;
+ int direction;
if (count > 0)
{
ceiling_byte = min (tem, ceiling_byte);
{
- /* The termination address of the dumb loop. */
+ /* The termination address of the dumb loop. */
register unsigned char *ceiling_addr
= BYTE_POS_ADDR (ceiling_byte) + 1;
register unsigned char *cursor
if (shortage == 0)
pos--;
-
+
return pos;
}
\f
{
case '|': case '(': case ')': case '`': case '\'': case 'b':
case 'B': case '<': case '>': case 'w': case 'W': case 's':
- case 'S': case '=': case '{': case '}':
+ case 'S': case '=': case '{': case '}': case '_':
case 'c': case 'C': /* for categoryspec and notcategoryspec */
case '1': case '2': case '3': case '4': case '5':
case '6': case '7': case '8': case '9':
return pos;
}
- if (RE && !trivial_regexp_p (string))
+ if (RE && !(trivial_regexp_p (string) && NILP (Vsearch_spaces_regexp)))
{
unsigned char *p1, *p2;
int s1, s2;
s2 = 0;
}
re_match_object = Qnil;
-
+
while (n < 0)
{
int val;
int infinity, limit, stride_for_teases = 0;
register int *BM_tab;
int *BM_tab_base;
- register unsigned char *cursor, *p_limit;
+ register unsigned char *cursor, *p_limit;
register int i, j;
unsigned char *pat, *pat_end;
int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
/* a single test, a test for having gone past the end of the */
/* permissible match region, to test for both possible matches (when */
/* the stride goes past the end immediately) and failure to */
- /* match (where you get nudged past the end one stride at a time). */
+ /* match (where you get nudged past the end one stride at a time). */
/* Here we make a "mickey mouse" BM table. The stride of the search */
/* is determined only by the last character of the putative match. */
/* If that character does not match, we will stride the proper */
/* distance to propose a match that superimposes it on the last */
/* instance of a character that matches it (per trt), or misses */
- /* it entirely if there is none. */
+ /* it entirely if there is none. */
dirlen = len_byte * direction;
infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
BM_tab[j] = dirlen - i;
/* A translation table is accompanied by its inverse -- see */
- /* comment following downcase_table for details */
+ /* comment following downcase_table for details */
if (this_translated)
{
int starting_ch = ch;
/* This loop can be coded for space rather than */
/* speed because it will usually run only once. */
/* (the reach is at most len + 21, and typically */
- /* does not exceed len) */
+ /* does not exceed len) */
while ((limit - pos_byte) * direction >= 0)
pos_byte += BM_tab[FETCH_BYTE (pos_byte)];
/* now run the same tests to distinguish going off the */
for (i = 0, i_byte = 0; i < len; )
{
int c;
-
+
FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
if (SYNTAX (c) != Sword)
{
int c;
int i_byte_orig = i_byte;
-
+
FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
if (SYNTAX (c) == Sword)
if (c == '\\')
{
FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
-
+
if (c == '&')
{
substart = search_regs.start[sub];
subend = search_regs.end[sub];
}
- else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
+ else if (c >= '1' && c <= '9')
{
- if (search_regs.start[c - '0'] >= 0)
+ if (search_regs.start[c - '0'] >= 0
+ && c <= search_regs.num_regs + '0')
{
substart = search_regs.start[c - '0'];
subend = search_regs.end[c - '0'];
}
+ else
+ {
+ /* If that subexp did not match,
+ replace \\N with nothing. */
+ substart = 0;
+ subend = 0;
+ }
}
else if (c == '\\')
delbackslash = 1;
}
if (really_changed)
- newtext = make_string (substed, substed_len);
+ {
+ if (buf_multibyte)
+ {
+ int nchars = multibyte_chars_in_text (substed, substed_len);
+ newtext = make_multibyte_string (substed, nchars, substed_len);
+ }
+ else
+ newtext = make_unibyte_string (substed, substed_len);
+ }
xfree (substed);
}
/* Adjust search data for this change. */
{
+ int oldend = search_regs.end[sub];
+ int oldstart = search_regs.start[sub];
int change = newpoint - search_regs.end[sub];
int i;
for (i = 0; i < search_regs.num_regs; i++)
{
- if (search_regs.start[i] > newpoint)
+ if (search_regs.start[i] >= oldend)
search_regs.start[i] += change;
- if (search_regs.end[i] > newpoint)
+ else if (search_regs.start[i] > oldstart)
+ search_regs.start[i] = oldstart;
+ if (search_regs.end[i] >= oldend)
search_regs.end[i] += change;
+ else if (search_regs.end[i] > oldstart)
+ search_regs.end[i] = oldstart;
}
}
/* Now move point "officially" to the start of the inserted replacement. */
move_if_not_intangible (newpoint);
-
+
return Qnil;
}
\f
CHECK_NUMBER (num);
n = XINT (num);
- if (n < 0 || n >= search_regs.num_regs)
- args_out_of_range (num, make_number (search_regs.num_regs));
- if (search_regs.num_regs <= 0
+ if (n < 0)
+ args_out_of_range (num, make_number (0));
+ if (search_regs.num_regs <= 0)
+ error ("No match data, because no search succeeded");
+ if (n >= search_regs.num_regs
|| search_regs.start[n] < 0)
return Qnil;
return (make_number ((beginningp) ? search_regs.start[n]
Lisp_Object subexp;
{
return match_limit (subexp, 0);
-}
+}
DEFUN ("match-data", Fmatch_data, Smatch_data, 0, 2, 0,
doc: /* Return a list containing all info on what the last search matched.
if the last match was on a buffer; integers or nil if a string was matched.
Use `store-match-data' to reinstate the data in this list.
-If INTEGERS (the optional first argument) is non-nil, always use integers
-\(rather than markers) to represent buffer positions.
+If INTEGERS (the optional first argument) is non-nil, always use
+integers \(rather than markers) to represent buffer positions. In
+this case, and if the last match was in a buffer, the buffer will get
+stored as one additional element at the end of the list.
+
If REUSE is a list, reuse it as part of the value. If REUSE is long enough
-to hold all the values, and if INTEGERS is non-nil, no consing is done. */)
+to hold all the values, and if INTEGERS is non-nil, no consing is done.
+
+Return value is undefined if the last search failed. */)
(integers, reuse)
Lisp_Object integers, reuse;
{
prev = Qnil;
- data = (Lisp_Object *) alloca ((2 * search_regs.num_regs)
+ data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
* sizeof (Lisp_Object));
- len = -1;
+ len = 0;
for (i = 0; i < search_regs.num_regs; i++)
{
int start = search_regs.start[i];
last_thing_searched);
data[2 * i + 1] = Fmake_marker ();
Fset_marker (data[2 * i + 1],
- make_number (search_regs.end[i]),
+ make_number (search_regs.end[i]),
last_thing_searched);
}
else
/* last_thing_searched must always be Qt, a buffer, or Qnil. */
abort ();
- len = i;
+ len = 2*(i+1);
}
else
data[2 * i] = data [2 * i + 1] = Qnil;
}
+ if (BUFFERP (last_thing_searched) && !NILP (integers))
+ {
+ data[len] = last_thing_searched;
+ len++;
+ }
+
/* If REUSE is not usable, cons up the values and return them. */
if (! CONSP (reuse))
- return Flist (2 * len + 2, data);
+ return Flist (len, data);
/* If REUSE is a list, store as many value elements as will fit
into the elements of REUSE. */
for (i = 0, tail = reuse; CONSP (tail);
i++, tail = XCDR (tail))
{
- if (i < 2 * len + 2)
+ if (i < len)
XSETCAR (tail, data[i]);
else
XSETCAR (tail, Qnil);
/* If we couldn't fit all value elements into REUSE,
cons up the rest of them and add them to the end of REUSE. */
- if (i < 2 * len + 2)
- XSETCDR (prev, Flist (2 * len + 2 - i, data + i));
+ if (i < len)
+ XSETCDR (prev, Flist (len - i, data + i));
return reuse;
}
if (!CONSP (list) && !NILP (list))
list = wrong_type_argument (Qconsp, list);
- /* Unless we find a marker with a buffer in LIST, assume that this
- match data came from a string. */
+ /* Unless we find a marker with a buffer or an explicit buffer
+ in LIST, assume that this match data came from a string. */
last_thing_searched = Qt;
/* Allocate registers if they don't already exist. */
search_regs.num_regs = length;
}
- }
- for (i = 0; i < search_regs.num_regs; i++)
- {
- marker = Fcar (list);
- if (NILP (marker))
- {
- search_regs.start[i] = -1;
- list = Fcdr (list);
- }
- else
- {
- int from;
+ for (i = 0;; i++)
+ {
+ marker = Fcar (list);
+ if (BUFFERP (marker))
+ {
+ last_thing_searched = marker;
+ break;
+ }
+ if (i >= length)
+ break;
+ if (NILP (marker))
+ {
+ search_regs.start[i] = -1;
+ list = Fcdr (list);
+ }
+ else
+ {
+ int from;
- if (MARKERP (marker))
- {
- if (XMARKER (marker)->buffer == 0)
- XSETFASTINT (marker, 0);
- else
- XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
- }
+ if (MARKERP (marker))
+ {
+ if (XMARKER (marker)->buffer == 0)
+ XSETFASTINT (marker, 0);
+ else
+ XSETBUFFER (last_thing_searched, XMARKER (marker)->buffer);
+ }
- CHECK_NUMBER_COERCE_MARKER (marker);
- from = XINT (marker);
- list = Fcdr (list);
+ CHECK_NUMBER_COERCE_MARKER (marker);
+ from = XINT (marker);
+ list = Fcdr (list);
- marker = Fcar (list);
- if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
- XSETFASTINT (marker, 0);
+ marker = Fcar (list);
+ if (MARKERP (marker) && XMARKER (marker)->buffer == 0)
+ XSETFASTINT (marker, 0);
- CHECK_NUMBER_COERCE_MARKER (marker);
- search_regs.start[i] = from;
- search_regs.end[i] = XINT (marker);
- }
- list = Fcdr (list);
- }
+ CHECK_NUMBER_COERCE_MARKER (marker);
+ search_regs.start[i] = from;
+ search_regs.end[i] = XINT (marker);
+ }
+ list = Fcdr (list);
+ }
- return Qnil;
+ for (; i < search_regs.num_regs; i++)
+ search_regs.start[i] = -1;
+ }
+
+ return Qnil;
}
/* If non-zero the match data have been saved in saved_search_regs
during the execution of a sentinel or filter. */
static int search_regs_saved;
static struct re_registers saved_search_regs;
+static Lisp_Object saved_last_thing_searched;
/* Called from Flooking_at, Fstring_match, search_buffer, Fstore_match_data
if asynchronous code (filter or sentinel) is running. */
saved_search_regs.num_regs = search_regs.num_regs;
saved_search_regs.start = search_regs.start;
saved_search_regs.end = search_regs.end;
+ saved_last_thing_searched = last_thing_searched;
+ last_thing_searched = Qnil;
search_regs.num_regs = 0;
search_regs.start = 0;
search_regs.end = 0;
search_regs.num_regs = saved_search_regs.num_regs;
search_regs.start = saved_search_regs.start;
search_regs.end = saved_search_regs.end;
-
+ last_thing_searched = saved_last_thing_searched;
+ saved_last_thing_searched = Qnil;
search_regs_saved = 0;
}
}
in = SDATA (string);
end = in + SBYTES (string);
- out = temp;
+ out = temp;
for (; in != end; in++)
{
out - temp,
STRING_MULTIBYTE (string));
}
-\f
+\f
void
syms_of_search ()
{
for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
{
searchbufs[i].buf.allocated = 100;
- searchbufs[i].buf.buffer = (unsigned char *) malloc (100);
+ searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
searchbufs[i].regexp = Qnil;
+ searchbufs[i].whitespace_regexp = Qnil;
staticpro (&searchbufs[i].regexp);
searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
}
last_thing_searched = Qnil;
staticpro (&last_thing_searched);
+ saved_last_thing_searched = Qnil;
+ staticpro (&saved_last_thing_searched);
+
+ DEFVAR_LISP ("search-spaces-regexp", &Vsearch_spaces_regexp,
+ doc: /* Regexp to substitute for bunches of spaces in regexp search.
+Some commands use this for user-specified regexps.
+Spaces that occur inside character classes or repetition operators
+or other such regexp constructs are not replaced with this.
+A value of nil (which is the normal value) means treat spaces literally. */);
+ Vsearch_spaces_regexp = Qnil;
+
defsubr (&Slooking_at);
defsubr (&Sposix_looking_at);
defsubr (&Sstring_match);
defsubr (&Sset_match_data);
defsubr (&Sregexp_quote);
}
+
+/* arch-tag: a6059d79-0552-4f14-a2cb-d379a4e3c78f
+ (do not change this comment) */