/* String search routines for GNU Emacs.
- Copyright (C) 1985, 86,87,93,94,97,98, 1999, 2004
- Free Software Foundation, Inc.
+ Copyright (C) 1985, 1986, 1987, 1993, 1994, 1997, 1998, 1999, 2002, 2003,
+ 2004, 2005 Free Software Foundation, Inc.
This file is part of GNU Emacs.
DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
doc: /* Return index of start of first match for REGEXP in STRING, or nil.
-Case is ignored if `case-fold-search' is non-nil in the current buffer.
+Matching ignores case if `case-fold-search' is non-nil.
If third arg START is non-nil, start search at that index in STRING.
For index of first char beyond the match, do (match-end 0).
`match-end' and `match-beginning' also give indices of substrings
int raw_pattern_size_byte;
unsigned char *patbuf;
int multibyte = !NILP (current_buffer->enable_multibyte_characters);
- unsigned char *base_pat = SDATA (string);
- /* Set to nozero if we find a non-ASCII char that need
- translation. */
- int char_base = 0;
+ unsigned char *base_pat;
+ /* Set to positive if we find a non-ASCII char that need
+ translation. Otherwise set to zero later. */
+ int char_base = -1;
int boyer_moore_ok = 1;
/* MULTIBYTE says whether the text to be searched is multibyte.
{
/* Check if all equivalents belong to the same
group of characters. Note that the check of C
- itself is done by the last iteration. Note
- also that we don't have to check ASCII
- characters because boyer-moore search can
- always handle their translation. */
- while (1)
+ itself is done by the last iteration. */
+ int this_char_base = -1;
+
+ while (boyer_moore_ok)
{
- if (! ASCII_BYTE_P (inverse))
+ if (ASCII_BYTE_P (inverse))
{
- if (CHAR_BYTE8_P (inverse))
- {
- /* Boyer-moore search can't handle a
- translation of an eight-bit
- character. */
- boyer_moore_ok = 0;
- break;
- }
- else if (char_base == 0)
- char_base = inverse & ~0x3F;
- else if ((inverse & ~0x3F)
- != char_base)
+ if (this_char_base > 0)
+ boyer_moore_ok = 0;
+ else
{
- boyer_moore_ok = 0;
- break;
+ this_char_base = 0;
+ if (char_base < 0)
+ char_base = this_char_base;
}
}
+ else if (CHAR_BYTE8_P (inverse))
+ /* Boyer-moore search can't handle a
+ translation of an eight-bit
+ character. */
+ boyer_moore_ok = 0;
+ else if (this_char_base < 0)
+ {
+ this_char_base = inverse & ~0x3F;
+ if (char_base < 0)
+ char_base = this_char_base;
+ else if (char_base > 0
+ && this_char_base != char_base)
+ boyer_moore_ok = 0;
+ }
+ else if ((inverse & ~0x3F) != this_char_base)
+ boyer_moore_ok = 0;
if (c == inverse)
break;
TRANSLATE (inverse, inverse_trt, inverse);
}
}
}
+ if (char_base < 0)
+ char_base = 0;
/* Store this character into the translated pattern. */
bcopy (str, pat, charlen);
{
int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
int forward = n > 0;
+ /* Number of buffer bytes matched. Note that this may be different
+ from len_byte in a multibyte buffer. */
+ int match_byte;
if (lim > pos && multibyte)
while (n > 0)
if (this_len == 0)
{
+ match_byte = this_pos_byte - pos_byte;
pos += len;
- pos_byte += len_byte;
+ pos_byte += match_byte;
break;
}
if (this_len == 0)
{
+ match_byte = len;
pos += len;
break;
}
if (pos - len < lim)
goto stop;
this_pos_byte = CHAR_TO_BYTE (this_pos);
+ match_byte = pos_byte - this_pos_byte;
while (this_len > 0)
{
if (this_len == 0)
{
pos -= len;
- pos_byte -= len_byte;
+ pos_byte -= match_byte;
break;
}
if (this_len == 0)
{
+ match_byte = len;
pos -= len;
break;
}
if (n == 0)
{
if (forward)
- set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
+ set_search_regs ((multibyte ? pos_byte : pos) - match_byte, match_byte);
else
- set_search_regs (multibyte ? pos_byte : pos, len_byte);
+ set_search_regs (multibyte ? pos_byte : pos, match_byte);
return pos;
}
unsigned char simple_translate[0400];
/* These are set to the preceding bytes of a byte to be translated
- if charset_base is nonzero. As the maximum byte length of a
+ if char_base is nonzero. As the maximum byte length of a
multibyte character is 5, we have to check at most four previous
bytes. */
int translate_prev_byte1 = 0;
i = infinity;
if (! NILP (trt))
{
- /* If the byte currently looking at is a head of a character
- to check case-equivalents, set CH to that character. An
- ASCII character and a non-ASCII character matching with
- CHAR_BASE are to be checked. */
+ /* If the byte currently looking at is the last of a
+ character to check case-equivalents, set CH to that
+ character. An ASCII character and a non-ASCII character
+ matching with CHAR_BASE are to be checked. */
int ch = -1;
if (ASCII_BYTE_P (*ptr) || ! multibyte)
ch = *ptr;
- else if (char_base && CHAR_HEAD_P (*ptr))
+ else if (char_base
+ && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
{
- ch = STRING_CHAR (ptr, pat_end - ptr);
+ unsigned char *charstart = ptr - 1;
+
+ while (! (CHAR_HEAD_P (*charstart)))
+ charstart--;
+ ch = STRING_CHAR (charstart, ptr - charstart + 1);
if (char_base != (ch & ~0x3F))
ch = -1;
}
- j = *ptr;
+ if (ch > 0400)
+ j = (ch & 0x3F) | 0200;
+ else
+ j = *ptr;
+
if (i == infinity)
stride_for_teases = BM_tab[j];
if (ch >= 0)
{
int starting_ch = ch;
- int starting_j;
+ int starting_j = j;
- if (ch > 0400)
- starting_j = (ch & ~0x3F) | 0200;
- else
- starting_j = ch;
while (1)
{
TRANSLATE (ch, inverse_trt, ch);
if (ch > 0400)
- j = (ch & ~0x3F) | 0200;
+ j = (ch & 0x3F) | 0200;
else
j = ch;
else
some_multiletter_word = 1;
}
- else if (!NOCASEP (c))
+ else if (UPPERCASEP (c))
{
some_uppercase = 1;
if (SYNTAX (prevc) != Sword)
searchbufs[i].regexp = Qnil;
searchbufs[i].whitespace_regexp = Qnil;
staticpro (&searchbufs[i].regexp);
+ staticpro (&searchbufs[i].whitespace_regexp);
searchbufs[i].next = (i == REGEXP_CACHE_SIZE-1 ? 0 : &searchbufs[i+1]);
}
searchbuf_head = &searchbufs[0];