/* String search routines for GNU Emacs.
Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
- 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
+ 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ Free Software Foundation, Inc.
This file is part of GNU Emacs.
-GNU Emacs is free software; you can redistribute it and/or modify
+GNU Emacs is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
GNU Emacs is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with GNU Emacs; see the file COPYING. If not, write to
-the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA. */
+along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
#include <config.h>
cp->posix = posix;
cp->buf.multibyte = STRING_MULTIBYTE (pattern);
cp->buf.charset_unibyte = charset_unibyte;
- cp->whitespace_regexp = Vsearch_spaces_regexp;
+ if (STRINGP (Vsearch_spaces_regexp))
+ cp->whitespace_regexp = Vsearch_spaces_regexp;
+ else
+ cp->whitespace_regexp = Qnil;
+
/* rms: I think BLOCK_INPUT is not needed here any more,
because regex.c defines malloc to call xmalloc.
Using BLOCK_INPUT here means the debugger won't run if an error occurs.
/* BLOCK_INPUT; */
old = re_set_syntax (RE_SYNTAX_EMACS
| (posix ? 0 : RE_NO_POSIX_BACKTRACKING));
- re_set_whitespace_regexp (NILP (Vsearch_spaces_regexp) ? NULL
- : SDATA (Vsearch_spaces_regexp));
+
+ if (STRINGP (Vsearch_spaces_regexp))
+ re_set_whitespace_regexp (SDATA (Vsearch_spaces_regexp));
+ else
+ re_set_whitespace_regexp (NULL);
val = (char *) re_compile_pattern ((char *) SDATA (pattern),
SBYTES (pattern), &cp->buf);
immediate_quit = 0;
return val;
}
+\f
+/* Match REGEXP atainst the characters after POS to LIMIT, and return
+ the number of matched characters. If STRING is non-nil, match
+ against the characters in it. In that case, POS and LIMIT are
+ indices into the string. This function doesn't modify the match
+ data. */
+
+EMACS_INT
+fast_looking_at (regexp, pos, pos_byte, limit, limit_byte, string)
+ Lisp_Object regexp;
+ EMACS_INT pos, pos_byte, limit, limit_byte;
+ Lisp_Object string;
+{
+ int multibyte;
+ struct re_pattern_buffer *buf;
+ unsigned char *p1, *p2;
+ int s1, s2;
+ EMACS_INT len;
+
+ if (STRINGP (string))
+ {
+ if (pos_byte < 0)
+ pos_byte = string_char_to_byte (string, pos);
+ if (limit_byte < 0)
+ limit_byte = string_char_to_byte (string, limit);
+ p1 = NULL;
+ s1 = 0;
+ p2 = SDATA (string);
+ s2 = SBYTES (string);
+ re_match_object = string;
+ multibyte = STRING_MULTIBYTE (string);
+ }
+ else
+ {
+ if (pos_byte < 0)
+ pos_byte = CHAR_TO_BYTE (pos);
+ if (limit_byte < 0)
+ limit_byte = CHAR_TO_BYTE (limit);
+ pos_byte -= BEGV_BYTE;
+ limit_byte -= BEGV_BYTE;
+ p1 = BEGV_ADDR;
+ s1 = GPT_BYTE - BEGV_BYTE;
+ p2 = GAP_END_ADDR;
+ s2 = ZV_BYTE - GPT_BYTE;
+ if (s1 < 0)
+ {
+ p2 = p1;
+ s2 = ZV_BYTE - BEGV_BYTE;
+ s1 = 0;
+ }
+ if (s2 < 0)
+ {
+ s1 = ZV_BYTE - BEGV_BYTE;
+ s2 = 0;
+ }
+ re_match_object = Qnil;
+ multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+ }
+
+ buf = compile_pattern (regexp, 0, Qnil, 0, multibyte);
+ immediate_quit = 1;
+ len = re_match_2 (buf, (char *) p1, s1, (char *) p2, s2,
+ pos_byte, NULL, limit_byte);
+ immediate_quit = 0;
+
+ return len;
+}
+
\f
/* The newline cache: remembering which sections of text have no newlines. */
if (this_char_base > 0)
boyer_moore_ok = 0;
else
- {
- this_char_base = 0;
- if (char_base < 0)
- char_base = this_char_base;
- }
+ this_char_base = 0;
}
else if (CHAR_BYTE8_P (inverse))
/* Boyer-moore search can't handle a
this_char_base = inverse & ~0x3F;
if (char_base < 0)
char_base = this_char_base;
- else if (char_base > 0
- && this_char_base != char_base)
+ else if (this_char_base != char_base)
boyer_moore_ok = 0;
}
else if ((inverse & ~0x3F) != this_char_base)
}
}
}
- if (char_base < 0)
- char_base = 0;
/* Store this character into the translated pattern. */
bcopy (str, pat, charlen);
base_pat += in_charlen;
len_byte -= in_charlen;
}
+
+ /* If char_base is still negative we didn't find any translated
+ non-ASCII characters. */
+ if (char_base < 0)
+ char_base = 0;
}
else
{
ch = -1;
}
- if (ch >= 0400)
+ if (ch >= 0200)
j = (ch & 0x3F) | 0200;
else
j = *ptr;
while (1)
{
TRANSLATE (ch, inverse_trt, ch);
- if (ch >= 0400)
+ if (ch >= 0200)
j = (ch & 0x3F) | 0200;
else
j = ch;
XSETBUFFER (last_thing_searched, current_buffer);
}
\f
-/* Given a string of words separated by word delimiters,
- compute a regexp that matches those exact words
- separated by arbitrary punctuation. */
+/* Given STRING, a string of words separated by word delimiters,
+ compute a regexp that matches those exact words separated by
+ arbitrary punctuation. If LAX is nonzero, the end of the string
+ need not match a word boundary unless it ends in whitespace. */
static Lisp_Object
-wordify (string)
+wordify (string, lax)
Lisp_Object string;
+ int lax;
{
register unsigned char *p, *o;
register int i, i_byte, len, punct_count = 0, word_count = 0;
Lisp_Object val;
int prev_c = 0;
- int adjust;
+ int adjust, whitespace_at_end;
CHECK_STRING (string);
p = SDATA (string);
}
if (SYNTAX (prev_c) == Sword)
- word_count++;
+ {
+ word_count++;
+ whitespace_at_end = 0;
+ }
+ else
+ whitespace_at_end = 1;
+
if (!word_count)
return empty_unibyte_string;
- adjust = - punct_count + 5 * (word_count - 1) + 4;
+ adjust = - punct_count + 5 * (word_count - 1)
+ + ((lax && !whitespace_at_end) ? 2 : 4);
if (STRING_MULTIBYTE (string))
val = make_uninit_multibyte_string (len + adjust,
SBYTES (string)
prev_c = c;
}
- *o++ = '\\';
- *o++ = 'b';
+ if (!lax || whitespace_at_end)
+ {
+ *o++ = '\\';
+ *o++ = 'b';
+ }
return val;
}
(string, bound, noerror, count)
Lisp_Object string, bound, noerror, count;
{
- return search_command (wordify (string), bound, noerror, count, -1, 1, 0);
+ return search_command (wordify (string, 0), bound, noerror, count, -1, 1, 0);
}
DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
(string, bound, noerror, count)
Lisp_Object string, bound, noerror, count;
{
- return search_command (wordify (string), bound, noerror, count, 1, 1, 0);
+ return search_command (wordify (string, 0), bound, noerror, count, 1, 1, 0);
+}
+
+DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
+ "sWord search backward: ",
+ doc: /* Search backward from point for STRING, ignoring differences in punctuation.
+Set point to the beginning of the occurrence found, and return point.
+
+Unlike `word-search-backward', the end of STRING need not match a word
+boundary unless it ends in whitespace.
+
+An optional second argument bounds the search; it is a buffer position.
+The match found must not extend before that position.
+Optional third argument, if t, means if fail just return nil (no error).
+ If not nil and not t, move to limit of search and return nil.
+Optional fourth argument is repeat count--search for successive occurrences. */)
+ (string, bound, noerror, count)
+ Lisp_Object string, bound, noerror, count;
+{
+ return search_command (wordify (string, 1), bound, noerror, count, -1, 1, 0);
+}
+
+DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
+ "sWord search: ",
+ doc: /* Search forward from point for STRING, ignoring differences in punctuation.
+Set point to the end of the occurrence found, and return point.
+
+Unlike `word-search-forward', the end of STRING need not match a word
+boundary unless it ends in whitespace.
+
+An optional second argument bounds the search; it is a buffer position.
+The match found must not extend after that position.
+Optional third argument, if t, means if fail just return nil (no error).
+ If not nil and not t, move to limit of search and return nil.
+Optional fourth argument is repeat count--search for successive occurrences. */)
+ (string, bound, noerror, count)
+ Lisp_Object string, bound, noerror, count;
+{
+ return search_command (wordify (string, 1), bound, noerror, count, 1, 1, 0);
}
DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
return reuse;
}
-/* Internal usage only:
- If RESEAT is `evaporate', put the markers back on the free list
- immediately. No other references to the markers must exist in this case,
- so it is used only internally on the unwind stack and save-match-data from
- Lisp. */
+/* We used to have an internal use variant of `reseat' described as:
+
+ If RESEAT is `evaporate', put the markers back on the free list
+ immediately. No other references to the markers must exist in this
+ case, so it is used only internally on the unwind stack and
+ save-match-data from Lisp.
+
+ But it was ill-conceived: those supposedly-internal markers get exposed via
+ the undo-list, so freeing them here is unsafe. */
DEFUN ("set-match-data", Fset_match_data, Sset_match_data, 1, 2, 0,
doc: /* Set internal data on last search match from elements of LIST.
if (!NILP (reseat) && MARKERP (m))
{
- if (EQ (reseat, Qevaporate))
- free_marker (m);
- else
- unchain_marker (XMARKER (m));
+ unchain_marker (XMARKER (m));
XSETCAR (list, Qnil);
}
if (!NILP (reseat) && MARKERP (m))
{
- if (EQ (reseat, Qevaporate))
- free_marker (m);
- else
- unchain_marker (XMARKER (m));
+ unchain_marker (XMARKER (m));
XSETCAR (list, Qnil);
}
}
unwind_set_match_data (list)
Lisp_Object list;
{
- /* It is safe to free (evaporate) the markers immediately. */
- return Fset_match_data (list, Qevaporate);
+ /* It is NOT ALWAYS safe to free (evaporate) the markers immediately. */
+ return Fset_match_data (list, Qt);
}
/* Called to unwind protect the match data. */
DEFVAR_LISP ("inhibit-changing-match-data", &Vinhibit_changing_match_data,
doc: /* Internal use only.
-If non-nil, the match data will not be changed during call to searching or
-matching functions, such as `looking-at', `string-match', `re-search-forward'
-etc. */);
+If non-nil, the primitive searching and matching functions
+such as `looking-at', `string-match', `re-search-forward', etc.,
+do not set the match data. The proper way to use this variable
+is to bind it with `let' around a small expression. */);
Vinhibit_changing_match_data = Qnil;
defsubr (&Slooking_at);
defsubr (&Ssearch_backward);
defsubr (&Sword_search_forward);
defsubr (&Sword_search_backward);
+ defsubr (&Sword_search_forward_lax);
+ defsubr (&Sword_search_backward_lax);
defsubr (&Sre_search_forward);
defsubr (&Sre_search_backward);
defsubr (&Sposix_search_forward);