/* String search routines for GNU Emacs.
- Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2012
- Free Software Foundation, Inc.
+
+Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2012
+ Free Software Foundation, Inc.
This file is part of GNU Emacs.
#include <config.h>
-#include <setjmp.h>
+
#include "lisp.h"
#include "syntax.h"
#include "category.h"
-#include "buffer.h"
#include "character.h"
+#include "buffer.h"
#include "charset.h"
#include "region-cache.h"
#include "commands.h"
static EMACS_INT search_buffer (Lisp_Object, ptrdiff_t, ptrdiff_t,
ptrdiff_t, ptrdiff_t, EMACS_INT, int,
Lisp_Object, Lisp_Object, int);
-static void matcher_overflow (void) NO_RETURN;
-static void
+static _Noreturn void
matcher_overflow (void)
{
error ("Stack overflow in regexp matcher");
re_set_whitespace_regexp (NULL);
re_set_syntax (old);
- /* UNBLOCK_INPUT; */
+ /* unblock_input (); */
if (val)
xsignal1 (Qinvalid_regexp, build_string (val));
for (cp = searchbuf_head; cp != 0; cp = cp->next)
{
cp->buf.allocated = cp->buf.used;
- cp->buf.buffer
- = (unsigned char *) xrealloc (cp->buf.buffer, cp->buf.used);
+ cp->buf.buffer = xrealloc (cp->buf.buffer, cp->buf.used);
}
}
save_search_regs ();
/* This is so set_image_of_range_1 in regex.c can find the EQV table. */
- XCHAR_TABLE (BVAR (current_buffer, case_canon_table))->extras[2]
- = BVAR (current_buffer, case_eqv_table);
+ set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
+ BVAR (current_buffer, case_eqv_table));
CHECK_STRING (string);
bufp = compile_pattern (string,
}
/* This is so set_image_of_range_1 in regex.c can find the EQV table. */
- XCHAR_TABLE (BVAR (current_buffer, case_canon_table))->extras[2]
- = BVAR (current_buffer, case_eqv_table);
+ set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
+ BVAR (current_buffer, case_eqv_table));
bufp = compile_pattern (regexp,
(NILP (Vinhibit_changing_match_data)
We assume that STRING contains single-byte characters. */
ptrdiff_t
-fast_c_string_match_ignore_case (Lisp_Object regexp, const char *string)
+fast_c_string_match_ignore_case (Lisp_Object regexp,
+ const char *string, ptrdiff_t len)
{
ptrdiff_t val;
struct re_pattern_buffer *bufp;
- size_t len = strlen (string);
regexp = string_make_unibyte (regexp);
re_match_object = Qt;
obstacle --- the last character the dumb search loop should
examine. */
ptrdiff_t ceiling_byte = CHAR_TO_BYTE (end) - 1;
- ptrdiff_t start_byte = CHAR_TO_BYTE (start);
+ ptrdiff_t start_byte;
ptrdiff_t tem;
/* If we're looking for a newline, consult the newline cache
ptrdiff_t next_change;
immediate_quit = 0;
while (region_cache_forward
- (current_buffer, newline_cache, start_byte, &next_change))
- start_byte = next_change;
+ (current_buffer, newline_cache, start, &next_change))
+ start = next_change;
immediate_quit = allow_quit;
+ start_byte = CHAR_TO_BYTE (start);
+
/* START should never be after END. */
if (start_byte > ceiling_byte)
start_byte = ceiling_byte;
/* Now the text after start is an unknown region, and
next_change is the position of the next known region. */
- ceiling_byte = min (next_change - 1, ceiling_byte);
+ ceiling_byte = min (CHAR_TO_BYTE (next_change) - 1, ceiling_byte);
}
+ else
+ start_byte = CHAR_TO_BYTE (start);
/* The dumb loop can only scan text stored in contiguous
bytes. BUFFER_CEILING_OF returns the last character
{
/* The last character to check before the next obstacle. */
ptrdiff_t ceiling_byte = CHAR_TO_BYTE (end);
- ptrdiff_t start_byte = CHAR_TO_BYTE (start);
+ ptrdiff_t start_byte;
ptrdiff_t tem;
/* Consult the newline cache, if appropriate. */
ptrdiff_t next_change;
immediate_quit = 0;
while (region_cache_backward
- (current_buffer, newline_cache, start_byte, &next_change))
- start_byte = next_change;
+ (current_buffer, newline_cache, start, &next_change))
+ start = next_change;
immediate_quit = allow_quit;
+ start_byte = CHAR_TO_BYTE (start);
+
/* Start should never be at or before end. */
if (start_byte <= ceiling_byte)
start_byte = ceiling_byte + 1;
/* Now the text before start is an unknown region, and
next_change is the position of the next known region. */
- ceiling_byte = max (next_change, ceiling_byte);
+ ceiling_byte = max (CHAR_TO_BYTE (next_change), ceiling_byte);
}
+ else
+ start_byte = CHAR_TO_BYTE (start);
/* Stop scanning before the gap. */
tem = BUFFER_FLOOR_OF (start_byte - 1);
}
/* This is so set_image_of_range_1 in regex.c can find the EQV table. */
- XCHAR_TABLE (BVAR (current_buffer, case_canon_table))->extras[2]
- = BVAR (current_buffer, case_eqv_table);
+ set_char_table_extras (BVAR (current_buffer, case_canon_table), 2,
+ BVAR (current_buffer, case_eqv_table));
np = search_buffer (string, PT, PT_BYTE, lim, lim_byte, n, RE,
(!NILP (BVAR (current_buffer, case_fold_search))
if (!EQ (noerror, Qt))
{
if (lim < BEGV || lim > ZV)
- abort ();
+ emacs_abort ();
SET_PT_BOTH (lim, lim_byte);
return Qnil;
#if 0 /* This would be clean, but maybe programs depend on
}
if (np < BEGV || np > ZV)
- abort ();
+ emacs_abort ();
SET_PT (np);
while (n < 0)
{
ptrdiff_t val;
+
val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
pos_byte - BEGV_BYTE, lim_byte - pos_byte,
(NILP (Vinhibit_changing_match_data)
while (n > 0)
{
ptrdiff_t val;
+
val = re_search_2 (bufp, (char *) p1, s1, (char *) p2, s2,
pos_byte - BEGV_BYTE, lim_byte - pos_byte,
(NILP (Vinhibit_changing_match_data)
raw_pattern_size_byte
= count_size_as_multibyte (SDATA (string),
raw_pattern_size);
- raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
+ raw_pattern = alloca (raw_pattern_size_byte + 1);
copy_text (SDATA (string), raw_pattern,
SCHARS (string), 0, 1);
}
the chosen single-byte character set can possibly match. */
raw_pattern_size = SCHARS (string);
raw_pattern_size_byte = SCHARS (string);
- raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
+ raw_pattern = alloca (raw_pattern_size + 1);
copy_text (SDATA (string), raw_pattern,
SBYTES (string), 1, 0);
}
/* Copy and optionally translate the pattern. */
len = raw_pattern_size;
len_byte = raw_pattern_size_byte;
- patbuf = (unsigned char *) alloca (len * MAX_MULTIBYTE_LENGTH);
+ patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
pat = patbuf;
base_pat = raw_pattern;
if (multibyte)
int forward = n > 0;
/* Number of buffer bytes matched. Note that this may be different
from len_byte in a multibyte buffer. */
- ptrdiff_t match_byte;
+ ptrdiff_t match_byte = PTRDIFF_MIN;
if (lim > pos && multibyte)
while (n > 0)
stop:
if (n == 0)
{
+ eassert (match_byte != PTRDIFF_MIN);
if (forward)
set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
else
the match position. */
if (search_regs.num_regs == 0)
{
- search_regs.start = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
- search_regs.end = (regoff_t *) xmalloc (2 * sizeof (regoff_t));
+ search_regs.start = xmalloc (2 * sizeof (regoff_t));
+ search_regs.end = xmalloc (2 * sizeof (regoff_t));
search_regs.num_regs = 2;
}
XSETBUFFER (last_thing_searched, current_buffer);
}
\f
-DEFUN ("word-search-regexp", Fword_search_regexp, Sword_search_regexp, 1, 2, 0,
- doc: /* Return a regexp which matches words, ignoring punctuation.
-Given STRING, a string of words separated by word delimiters,
-compute a regexp that matches those exact words separated by
-arbitrary punctuation. If LAX is non-nil, the end of the string
-need not match a word boundary unless it ends in whitespace.
-
-Used in `word-search-forward', `word-search-backward',
-`word-search-forward-lax', `word-search-backward-lax'. */)
- (Lisp_Object string, Lisp_Object lax)
-{
- register unsigned char *o;
- register ptrdiff_t i, i_byte, len, punct_count = 0, word_count = 0;
- Lisp_Object val;
- int prev_c = 0;
- EMACS_INT adjust;
- int whitespace_at_end;
-
- CHECK_STRING (string);
- len = SCHARS (string);
-
- for (i = 0, i_byte = 0; i < len; )
- {
- int c;
-
- FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
-
- if (SYNTAX (c) != Sword)
- {
- punct_count++;
- if (SYNTAX (prev_c) == Sword)
- word_count++;
- }
-
- prev_c = c;
- }
-
- if (SYNTAX (prev_c) == Sword)
- {
- word_count++;
- whitespace_at_end = 0;
- }
- else
- {
- whitespace_at_end = 1;
- if (!word_count)
- return empty_unibyte_string;
- }
-
- adjust = word_count - 1;
- if (TYPE_MAXIMUM (EMACS_INT) / 5 < adjust)
- memory_full (SIZE_MAX);
- adjust = - punct_count + 5 * adjust
- + ((!NILP (lax) && !whitespace_at_end) ? 2 : 4);
- if (STRING_MULTIBYTE (string))
- {
- if (INT_ADD_OVERFLOW (SBYTES (string), adjust))
- memory_full (SIZE_MAX);
- val = make_uninit_multibyte_string (len + adjust,
- SBYTES (string) + adjust);
- }
- else
- {
- if (INT_ADD_OVERFLOW (len, adjust))
- memory_full (SIZE_MAX);
- val = make_uninit_string (len + adjust);
- }
-
- o = SDATA (val);
- *o++ = '\\';
- *o++ = 'b';
- prev_c = 0;
-
- for (i = 0, i_byte = 0; i < len; )
- {
- int c;
- ptrdiff_t i_byte_orig = i_byte;
-
- FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE (c, string, i, i_byte);
-
- if (SYNTAX (c) == Sword)
- {
- memcpy (o, SDATA (string) + i_byte_orig, i_byte - i_byte_orig);
- o += i_byte - i_byte_orig;
- }
- else if (SYNTAX (prev_c) == Sword && --word_count)
- {
- *o++ = '\\';
- *o++ = 'W';
- *o++ = '\\';
- *o++ = 'W';
- *o++ = '*';
- }
-
- prev_c = c;
- }
-
- if (NILP (lax) || whitespace_at_end)
- {
- *o++ = '\\';
- *o++ = 'b';
- }
-
- return val;
-}
-\f
DEFUN ("search-backward", Fsearch_backward, Ssearch_backward, 1, 4,
"MSearch backward: ",
doc: /* Search backward from point for STRING.
return search_command (string, bound, noerror, count, 1, 0, 0);
}
-DEFUN ("word-search-backward", Fword_search_backward, Sword_search_backward, 1, 4,
- "sWord search backward: ",
- doc: /* Search backward from point for STRING, ignoring differences in punctuation.
-Set point to the beginning of the occurrence found, and return point.
-An optional second argument bounds the search; it is a buffer position.
-The match found must not extend before that position.
-Optional third argument, if t, means if fail just return nil (no error).
- If not nil and not t, move to limit of search and return nil.
-Optional fourth argument is repeat count--search for successive occurrences.
-
-Relies on the function `word-search-regexp' to convert a sequence
-of words in STRING to a regexp used to search words without regard
-to punctuation. */)
- (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
-{
- return search_command (Fword_search_regexp (string, Qnil), bound, noerror, count, -1, 1, 0);
-}
-
-DEFUN ("word-search-forward", Fword_search_forward, Sword_search_forward, 1, 4,
- "sWord search: ",
- doc: /* Search forward from point for STRING, ignoring differences in punctuation.
-Set point to the end of the occurrence found, and return point.
-An optional second argument bounds the search; it is a buffer position.
-The match found must not extend after that position.
-Optional third argument, if t, means if fail just return nil (no error).
- If not nil and not t, move to limit of search and return nil.
-Optional fourth argument is repeat count--search for successive occurrences.
-
-Relies on the function `word-search-regexp' to convert a sequence
-of words in STRING to a regexp used to search words without regard
-to punctuation. */)
- (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
-{
- return search_command (Fword_search_regexp (string, Qnil), bound, noerror, count, 1, 1, 0);
-}
-
-DEFUN ("word-search-backward-lax", Fword_search_backward_lax, Sword_search_backward_lax, 1, 4,
- "sWord search backward: ",
- doc: /* Search backward from point for STRING, ignoring differences in punctuation.
-Set point to the beginning of the occurrence found, and return point.
-
-Unlike `word-search-backward', the end of STRING need not match a word
-boundary unless it ends in whitespace.
-
-An optional second argument bounds the search; it is a buffer position.
-The match found must not extend before that position.
-Optional third argument, if t, means if fail just return nil (no error).
- If not nil and not t, move to limit of search and return nil.
-Optional fourth argument is repeat count--search for successive occurrences.
-
-Relies on the function `word-search-regexp' to convert a sequence
-of words in STRING to a regexp used to search words without regard
-to punctuation. */)
- (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
-{
- return search_command (Fword_search_regexp (string, Qt), bound, noerror, count, -1, 1, 0);
-}
-
-DEFUN ("word-search-forward-lax", Fword_search_forward_lax, Sword_search_forward_lax, 1, 4,
- "sWord search: ",
- doc: /* Search forward from point for STRING, ignoring differences in punctuation.
-Set point to the end of the occurrence found, and return point.
-
-Unlike `word-search-forward', the end of STRING need not match a word
-boundary unless it ends in whitespace.
-
-An optional second argument bounds the search; it is a buffer position.
-The match found must not extend after that position.
-Optional third argument, if t, means if fail just return nil (no error).
- If not nil and not t, move to limit of search and return nil.
-Optional fourth argument is repeat count--search for successive occurrences.
-
-Relies on the function `word-search-regexp' to convert a sequence
-of words in STRING to a regexp used to search words without regard
-to punctuation. */)
- (Lisp_Object string, Lisp_Object bound, Lisp_Object noerror, Lisp_Object count)
-{
- return search_command (Fword_search_regexp (string, Qt), bound, noerror, count, 1, 1, 0);
-}
-
DEFUN ("re-search-backward", Fre_search_backward, Sre_search_backward, 1, 4,
"sRE search backward: ",
doc: /* Search backward from point for match for regular expression REGEXP.
doc: /* Replace text matched by last search with NEWTEXT.
Leave point at the end of the replacement text.
-If second arg FIXEDCASE is non-nil, do not alter case of replacement text.
-Otherwise maybe capitalize the whole text, or maybe just word initials,
-based on the replaced text.
-If the replaced text has only capital letters
-and has at least one multiletter word, convert NEWTEXT to all caps.
-Otherwise if all words are capitalized in the replaced text,
-capitalize each word in NEWTEXT.
+If optional second arg FIXEDCASE is non-nil, do not alter the case of
+the replacement text. Otherwise, maybe capitalize the whole text, or
+maybe just word initials, based on the replaced text. If the replaced
+text has only capital letters and has at least one multiletter word,
+convert NEWTEXT to all caps. Otherwise if all words are capitalized
+in the replaced text, capitalize each word in NEWTEXT.
-If third arg LITERAL is non-nil, insert NEWTEXT literally.
+If optional third arg LITERAL is non-nil, insert NEWTEXT literally.
Otherwise treat `\\' as special:
`\\&' in NEWTEXT means substitute original matched text.
`\\N' means substitute what matched the Nth `\\(...\\)'.
If Nth parens didn't match, substitute nothing.
`\\\\' means insert one `\\'.
+ `\\?' is treated literally
+ (for compatibility with `query-replace-regexp').
+ Any other character following `\\' signals an error.
Case conversion does not apply to these substitutions.
-FIXEDCASE and LITERAL are optional arguments.
-
-The optional fourth argument STRING can be a string to modify.
-This is meaningful when the previous match was done against STRING,
-using `string-match'. When used this way, `replace-match'
-creates and returns a new string made by copying STRING and replacing
-the part of STRING that was matched.
+If optional fourth argument STRING is non-nil, it should be a string
+to act on; this should be the string on which the previous match was
+done via `string-match'. In this case, `replace-match' creates and
+returns a new string, made by copying STRING and replacing the part of
+STRING that was matched (the original STRING itself is not altered).
The optional fifth argument SUBEXP specifies a subexpression;
it says to replace just that subexpression with NEWTEXT,
}
else if (c == '\\')
delbackslash = 1;
- else
+ else if (c != '?')
error ("Invalid use of `\\' in replacement text");
}
if (substart >= 0)
substed_alloc_size = ((STRING_BYTES_BOUND - 100) / 2 < length
? STRING_BYTES_BOUND
: length * 2 + 100);
- substed = (unsigned char *) xmalloc (substed_alloc_size);
+ substed = xmalloc (substed_alloc_size);
substed_len = 0;
/* Go thru NEWTEXT, producing the actual text to insert in
prev = Qnil;
- data = (Lisp_Object *) alloca ((2 * search_regs.num_regs + 1)
- * sizeof (Lisp_Object));
+ data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
len = 0;
for (i = 0; i < search_regs.num_regs; i++)
}
else
/* last_thing_searched must always be Qt, a buffer, or Qnil. */
- abort ();
+ emacs_abort ();
len = 2 * i + 2;
}
CHECK_STRING (string);
- temp = (char *) alloca (SBYTES (string) * 2);
+ temp = alloca (SBYTES (string) * 2);
/* Now copy the data into the new string, inserting escapes. */
for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
{
searchbufs[i].buf.allocated = 100;
- searchbufs[i].buf.buffer = (unsigned char *) xmalloc (100);
+ searchbufs[i].buf.buffer = xmalloc (100);
searchbufs[i].buf.fastmap = searchbufs[i].fastmap;
searchbufs[i].regexp = Qnil;
searchbufs[i].whitespace_regexp = Qnil;
DEFSYM (Qinvalid_regexp, "invalid-regexp");
Fput (Qsearch_failed, Qerror_conditions,
- pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
+ listn (CONSTYPE_PURE, 2, Qsearch_failed, Qerror));
Fput (Qsearch_failed, Qerror_message,
- make_pure_c_string ("Search failed"));
+ build_pure_c_string ("Search failed"));
Fput (Qinvalid_regexp, Qerror_conditions,
- pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
+ listn (CONSTYPE_PURE, 2, Qinvalid_regexp, Qerror));
Fput (Qinvalid_regexp, Qerror_message,
- make_pure_c_string ("Invalid regexp"));
+ build_pure_c_string ("Invalid regexp"));
last_thing_searched = Qnil;
staticpro (&last_thing_searched);
defsubr (&Sposix_string_match);
defsubr (&Ssearch_forward);
defsubr (&Ssearch_backward);
- defsubr (&Sword_search_regexp);
- defsubr (&Sword_search_forward);
- defsubr (&Sword_search_backward);
- defsubr (&Sword_search_forward_lax);
- defsubr (&Sword_search_backward_lax);
defsubr (&Sre_search_forward);
defsubr (&Sre_search_backward);
defsubr (&Sposix_search_forward);