/* String search routines for GNU Emacs.
Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2001, 2002,
- 2003, 2004, 2005, 2006, 2007, 2008, 2009
+ 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
Free Software Foundation, Inc.
This file is part of GNU Emacs.
#include <config.h>
+#include <setjmp.h>
#include "lisp.h"
#include "syntax.h"
#include "category.h"
only. */
Lisp_Object Vinhibit_changing_match_data;
-static void set_search_regs ();
-static void save_search_regs ();
-static int simple_search ();
-static int boyer_moore ();
-static int search_buffer ();
+static void set_search_regs P_ ((EMACS_INT, EMACS_INT));
+static void save_search_regs P_ ((void));
+static EMACS_INT simple_search P_ ((int, unsigned char *, int, int,
+ Lisp_Object, EMACS_INT, EMACS_INT,
+ EMACS_INT, EMACS_INT));
+static EMACS_INT boyer_moore P_ ((int, unsigned char *, int, int,
+ Lisp_Object, Lisp_Object,
+ EMACS_INT, EMACS_INT,
+ EMACS_INT, EMACS_INT, int));
+static EMACS_INT search_buffer P_ ((Lisp_Object, EMACS_INT, EMACS_INT,
+ EMACS_INT, EMACS_INT, int, int,
+ Lisp_Object, Lisp_Object, int));
static void matcher_overflow () NO_RETURN;
static void
int i;
for (i = 0; i < REGEXP_CACHE_SIZE; ++i)
- /* It's tempting to compare with the syntax-table we've actually changd,
- but it's not sufficient because char-table inheritance mewans that
+ /* It's tempting to compare with the syntax-table we've actually changed,
+ but it's not sufficient because char-table inheritance means that
modifying one syntax-table can change others at the same time. */
if (!EQ (searchbufs[i].syntax_table, Qt))
searchbufs[i].regexp = Qnil;
if (regp)
re_set_registers (&cp->buf, regp, regp->num_regs, regp->start, regp->end);
- /* The compiled pattern can be used both for mulitbyte and unibyte
+ /* The compiled pattern can be used both for multibyte and unibyte
target. But, we have to tell which the pattern is used for. */
cp->buf.target_multibyte = multibyte;
{
Lisp_Object val;
unsigned char *p1, *p2;
- int s1, s2;
+ EMACS_INT s1, s2;
register int i;
struct re_pattern_buffer *bufp;
{
int val;
struct re_pattern_buffer *bufp;
- int pos, pos_byte;
+ EMACS_INT pos, pos_byte;
int i;
if (running_asynch_code)
return val;
}
\f
-/* Match REGEXP atainst the characters after POS to LIMIT, and return
+/* Match REGEXP against the characters after POS to LIMIT, and return
the number of matched characters. If STRING is non-nil, match
against the characters in it. In that case, POS and LIMIT are
indices into the string. This function doesn't modify the match
int multibyte;
struct re_pattern_buffer *buf;
unsigned char *p1, *p2;
- int s1, s2;
+ EMACS_INT s1, s2;
EMACS_INT len;
-
+
if (STRINGP (string))
{
if (pos_byte < 0)
int
scan_buffer (target, start, end, count, shortage, allow_quit)
register int target;
- int start, end;
+ EMACS_INT start, end;
int count;
int *shortage;
int allow_quit;
the position of the last character before the next such
obstacle --- the last character the dumb search loop should
examine. */
- int ceiling_byte = CHAR_TO_BYTE (end) - 1;
- int start_byte = CHAR_TO_BYTE (start);
- int tem;
+ EMACS_INT ceiling_byte = CHAR_TO_BYTE (end) - 1;
+ EMACS_INT start_byte = CHAR_TO_BYTE (start);
+ EMACS_INT tem;
/* If we're looking for a newline, consult the newline cache
to see where we can avoid some scanning. */
while (start > end)
{
/* The last character to check before the next obstacle. */
- int ceiling_byte = CHAR_TO_BYTE (end);
- int start_byte = CHAR_TO_BYTE (start);
- int tem;
+ EMACS_INT ceiling_byte = CHAR_TO_BYTE (end);
+ EMACS_INT start_byte = CHAR_TO_BYTE (start);
+ EMACS_INT tem;
/* Consult the newline cache, if appropriate. */
if (target == '\n' && newline_cache)
int
scan_newline (start, start_byte, limit, limit_byte, count, allow_quit)
- int start, start_byte;
- int limit, limit_byte;
+ EMACS_INT start, start_byte;
+ EMACS_INT limit, limit_byte;
register int count;
int allow_quit;
{
register unsigned char *cursor;
unsigned char *base;
- register int ceiling;
+ EMACS_INT ceiling;
register unsigned char *ceiling_addr;
int old_immediate_quit = immediate_quit;
int
find_next_newline_no_quit (from, cnt)
- register int from, cnt;
+ EMACS_INT from;
+ int cnt;
{
return scan_buffer ('\n', from, 0, cnt, (int *) 0, 0);
}
int
find_before_next_newline (from, to, cnt)
- int from, to, cnt;
+ EMACS_INT from, to;
+ int cnt;
{
int shortage;
int pos = scan_buffer ('\n', from, to, cnt, &shortage, 1);
(i.e. Vinhibit_changing_match_data is non-nil). */
static struct re_registers search_regs_1;
-static int
+static EMACS_INT
search_buffer (string, pos, pos_byte, lim, lim_byte, n,
RE, trt, inverse_trt, posix)
Lisp_Object string;
- int pos;
- int pos_byte;
- int lim;
- int lim_byte;
+ EMACS_INT pos;
+ EMACS_INT pos_byte;
+ EMACS_INT lim;
+ EMACS_INT lim_byte;
int n;
int RE;
Lisp_Object trt;
base_pat++;
}
- c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
+ c = STRING_CHAR_AND_LENGTH (base_pat, in_charlen);
if (NILP (trt))
{
regardless of what is in TRT. It is used in cases where
boyer_moore cannot work. */
-static int
+static EMACS_INT
simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
int n;
unsigned char *pat;
int len, len_byte;
Lisp_Object trt;
- int pos, pos_byte;
- int lim, lim_byte;
+ EMACS_INT pos, pos_byte;
+ EMACS_INT lim, lim_byte;
{
int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
int forward = n > 0;
while (1)
{
/* Try matching at position POS. */
- int this_pos = pos;
- int this_pos_byte = pos_byte;
+ EMACS_INT this_pos = pos;
+ EMACS_INT this_pos_byte = pos_byte;
int this_len = len;
- int this_len_byte = len_byte;
unsigned char *p = pat;
if (pos + len > lim || pos_byte + len_byte > lim_byte)
goto stop;
int charlen, buf_charlen;
int pat_ch, buf_ch;
- pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
+ pat_ch = STRING_CHAR_AND_LENGTH (p, charlen);
buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
- ZV_BYTE - this_pos_byte,
buf_charlen);
TRANSLATE (buf_ch, trt, buf_ch);
if (buf_ch != pat_ch)
break;
- this_len_byte -= charlen;
this_len--;
p += charlen;
while (1)
{
/* Try matching at position POS. */
- int this_pos = pos;
+ EMACS_INT this_pos = pos;
int this_len = len;
unsigned char *p = pat;
while (1)
{
/* Try matching at position POS. */
- int this_pos = pos - len;
- int this_pos_byte;
+ EMACS_INT this_pos = pos;
+ EMACS_INT this_pos_byte = pos_byte;
int this_len = len;
- int this_len_byte = len_byte;
- unsigned char *p = pat;
+ const unsigned char *p = pat + len_byte;
- if (this_pos < lim || (pos_byte - len_byte) < lim_byte)
+ if (this_pos - len < lim || (pos_byte - len_byte) < lim_byte)
goto stop;
- this_pos_byte = CHAR_TO_BYTE (this_pos);
- match_byte = pos_byte - this_pos_byte;
while (this_len > 0)
{
- int charlen, buf_charlen;
+ int charlen;
int pat_ch, buf_ch;
- pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
- buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
- ZV_BYTE - this_pos_byte,
- buf_charlen);
+ DEC_BOTH (this_pos, this_pos_byte);
+ PREV_CHAR_BOUNDARY (p, pat);
+ pat_ch = STRING_CHAR (p);
+ buf_ch = STRING_CHAR (BYTE_POS_ADDR (this_pos_byte));
TRANSLATE (buf_ch, trt, buf_ch);
if (buf_ch != pat_ch)
break;
- this_len_byte -= charlen;
this_len--;
- p += charlen;
- this_pos_byte += buf_charlen;
- this_pos++;
}
if (this_len == 0)
{
- pos -= len;
- pos_byte -= match_byte;
+ match_byte = pos_byte - this_pos_byte;
+ pos = this_pos;
+ pos_byte = this_pos_byte;
break;
}
while (1)
{
/* Try matching at position POS. */
- int this_pos = pos - len;
+ EMACS_INT this_pos = pos - len;
int this_len = len;
unsigned char *p = pat;
If that criterion is not satisfied, do not call this function. */
-static int
+static EMACS_INT
boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
pos, pos_byte, lim, lim_byte, char_base)
int n;
int len, len_byte;
Lisp_Object trt;
Lisp_Object inverse_trt;
- int pos, pos_byte;
- int lim, lim_byte;
+ EMACS_INT pos, pos_byte;
+ EMACS_INT lim, lim_byte;
int char_base;
{
int direction = ((n > 0) ? 1 : -1);
register int dirlen;
- int limit, stride_for_teases = 0;
+ EMACS_INT limit;
+ int stride_for_teases = 0;
int BM_tab[0400];
register unsigned char *cursor, *p_limit;
register int i, j;
while (! (CHAR_HEAD_P (*charstart)))
charstart--;
- ch = STRING_CHAR (charstart, ptr - charstart + 1);
+ ch = STRING_CHAR (charstart);
if (char_base != (ch & ~0x3F))
ch = -1;
}
char if reverse) of pattern would align in a possible match. */
while (n != 0)
{
- int tail_end;
+ EMACS_INT tail_end;
unsigned char *tail_end_ptr;
/* It's been reported that some (broken) compiler thinks that
cursor += dirlen - i - direction; /* fix cursor */
if (i + direction == 0)
{
- int position, start, end;
+ EMACS_INT position, start, end;
cursor -= direction;
pos_byte += dirlen - i - direction;
if (i + direction == 0)
{
- int position, start, end;
+ EMACS_INT position, start, end;
pos_byte -= direction;
position = pos_byte + ((direction > 0) ? 1 - len_byte : 0);
static void
set_search_regs (beg_byte, nbytes)
- int beg_byte, nbytes;
+ EMACS_INT beg_byte, nbytes;
{
int i;
int some_nonuppercase_initial;
register int c, prevc;
int sub;
- int opoint, newpoint;
+ EMACS_INT opoint, newpoint;
CHECK_STRING (newtext);
if (NILP (fixedcase))
{
/* Decide how to casify by examining the matched text. */
- int last;
+ EMACS_INT last;
pos = search_regs.start[sub];
last = search_regs.end[sub];
if desired. */
if (NILP (literal))
{
- int lastpos = 0;
- int lastpos_byte = 0;
+ EMACS_INT lastpos = 0;
+ EMACS_INT lastpos_byte = 0;
/* We build up the substituted string in ACCUM. */
Lisp_Object accum;
Lisp_Object middle;
/* Note that we don't have to increment POS. */
c = SREF (newtext, pos_byte++);
if (buf_multibyte)
- c = unibyte_char_to_multibyte (c);
+ MAKE_CHAR_MULTIBYTE (c);
}
/* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
{
c = SREF (newtext, pos_byte++);
if (buf_multibyte)
- c = unibyte_char_to_multibyte (c);
+ MAKE_CHAR_MULTIBYTE (c);
}
if (c == '&')
set up ADD_STUFF and ADD_LEN to point to it. */
if (idx >= 0)
{
- int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
+ EMACS_INT begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
move_gap (search_regs.start[idx]);
/* Adjust search data for this change. */
{
- int oldend = search_regs.end[sub];
- int oldstart = search_regs.start[sub];
- int change = newpoint - search_regs.end[sub];
+ EMACS_INT oldend = search_regs.end[sub];
+ EMACS_INT oldstart = search_regs.start[sub];
+ EMACS_INT change = newpoint - search_regs.end[sub];
int i;
for (i = 0; i < search_regs.num_regs; i++)
}
else
{
- int from;
+ EMACS_INT from;
Lisp_Object m;
m = marker;
}
searchbuf_head = &searchbufs[0];
- Qsearch_failed = intern ("search-failed");
+ Qsearch_failed = intern_c_string ("search-failed");
staticpro (&Qsearch_failed);
- Qinvalid_regexp = intern ("invalid-regexp");
+ Qinvalid_regexp = intern_c_string ("invalid-regexp");
staticpro (&Qinvalid_regexp);
Fput (Qsearch_failed, Qerror_conditions,
- Fcons (Qsearch_failed, Fcons (Qerror, Qnil)));
+ pure_cons (Qsearch_failed, pure_cons (Qerror, Qnil)));
Fput (Qsearch_failed, Qerror_message,
- build_string ("Search failed"));
+ make_pure_c_string ("Search failed"));
Fput (Qinvalid_regexp, Qerror_conditions,
- Fcons (Qinvalid_regexp, Fcons (Qerror, Qnil)));
+ pure_cons (Qinvalid_regexp, pure_cons (Qerror, Qnil)));
Fput (Qinvalid_regexp, Qerror_message,
- build_string ("Invalid regexp"));
+ make_pure_c_string ("Invalid regexp"));
last_thing_searched = Qnil;
staticpro (&last_thing_searched);