/* String search routines for GNU Emacs.
- Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
+ Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
This file is part of GNU Emacs.
#include <config.h>
-#ifdef STDC_HEADERS
-#include <stdlib.h>
-#endif
#include "lisp.h"
#include "syntax.h"
#include "category.h"
error ("Stack overflow in regexp matcher");
}
-#ifdef __STDC__
-#define CONST const
-#else
-#define CONST
-#endif
-
/* Compile a regexp and signal a Lisp error if anything goes wrong.
PATTERN is the pattern to compile.
CP is the place to put the result.
cp->regexp = Fcopy_sequence (pattern);
}
+/* Shrink each compiled regexp buffer in the cache
+ to the size actually used right now.
+ This is called from garbage collection. */
+
+void
+shrink_regexp_cache ()
+{
+ struct regexp_cache *cp, **cpp;
+
+ for (cp = searchbuf_head; cp != 0; cp = cp->next)
+ {
+ cp->buf.allocated = cp->buf.used;
+ cp->buf.buffer
+ = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
+ }
+}
+
/* Compile a regexp if necessary, but first check to see if there's one in
the cache.
PATTERN is the pattern to compile.
for (cpp = &searchbuf_head; ; cpp = &cp->next)
{
cp = *cpp;
+ /* Entries are initialized to nil, and may be set to nil by
+ compile_pattern_1 if the pattern isn't valid. Don't apply
+ XSTRING in those cases. However, compile_pattern_1 is only
+ applied to the cache entry we pick here to reuse. So nil
+ should never appear before a non-nil entry. */
+ if (NILP (cp->regexp))
+ goto compile_it;
if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
&& !NILP (Fstring_equal (cp->regexp, pattern))
&& EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
&& cp->buf.multibyte == multibyte)
break;
- /* If we're at the end of the cache, compile into the last cell. */
+ /* If we're at the end of the cache, compile into the nil cell
+ we found, or the last (least recently used) cell with a
+ string value. */
if (cp->next == 0)
{
+ compile_it:
compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
break;
}
i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
PT_BYTE - BEGV_BYTE, &search_regs,
ZV_BYTE - BEGV_BYTE);
+ immediate_quit = 0;
+
if (i == -2)
matcher_overflow ();
= BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
}
XSETBUFFER (last_thing_searched, current_buffer);
- immediate_quit = 0;
return val;
}
DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
"Return index of start of first match for REGEXP in STRING, or nil.\n\
+Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
If third arg START is non-nil, start search at that index in STRING.\n\
For index of first char beyond the match, do (match-end 0).\n\
`match-end' and `match-beginning' also give indices of substrings\n\
DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
"Return index of start of first match for REGEXP in STRING, or nil.\n\
Find the longest match, in accord with Posix regular expression rules.\n\
+Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
If third arg START is non-nil, start search at that index in STRING.\n\
For index of first char beyond the match, do (match-end 0).\n\
`match-end' and `match-beginning' also give indices of substrings\n\
if (running_asynch_code)
save_search_regs ();
+ /* Searching 0 times means don't move. */
/* Null string is found at starting position. */
- if (len == 0)
+ if (len == 0 || n == 0)
{
set_search_regs (pos, 0);
return pos;
}
- /* Searching 0 times means don't move. */
- if (n == 0)
- return pos;
-
if (RE && !trivial_regexp_p (string))
{
unsigned char *p1, *p2;
int multibyte = !NILP (current_buffer->enable_multibyte_characters);
unsigned char *base_pat = XSTRING (string)->data;
int charset_base = -1;
- int simple = 1;
+ int boyer_moore_ok = 1;
/* MULTIBYTE says whether the text to be searched is multibyte.
We must convert PATTERN to match that, or we will not really
{
while (--len >= 0)
{
- unsigned char workbuf[4], *str;
+ unsigned char str[MAX_MULTIBYTE_LENGTH];
int c, translated, inverse;
int in_charlen, charlen;
}
c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
+
/* Translate the character, if requested. */
TRANSLATE (translated, trt, c);
/* If translation changed the byte-length, go back
to the original character. */
- charlen = CHAR_STRING (translated, workbuf, str);
+ charlen = CHAR_STRING (translated, str);
if (in_charlen != charlen)
{
translated = c;
- charlen = CHAR_STRING (c, workbuf, str);
+ charlen = CHAR_STRING (c, str);
}
+ /* If we are searching for something strange,
+ an invalid multibyte code, don't use boyer-moore. */
+ if (! ASCII_BYTE_P (translated)
+ && (charlen == 1 /* 8bit code */
+ || charlen != in_charlen /* invalid multibyte code */
+ ))
+ boyer_moore_ok = 0;
+
TRANSLATE (inverse, inverse_trt, c);
/* Did this char actually get translated?
{
/* Keep track of which character set row
contains the characters that need translation. */
- int charset_base_code = c & ~0xff;
+ int charset_base_code = c & ~CHAR_FIELD3_MASK;
if (charset_base == -1)
charset_base = charset_base_code;
else if (charset_base != charset_base_code)
/* If two different rows appear, needing translation,
then we cannot use boyer_moore search. */
- simple = 0;
- /* ??? Handa: this must do simple = 0
- if c is a composite character. */
+ boyer_moore_ok = 0;
}
/* Store this character into the translated pattern. */
}
else
{
+ /* Unibyte buffer. */
+ charset_base = 0;
while (--len >= 0)
{
- int c, translated, inverse;
+ int c, translated;
/* If we got here and the RE flag is set, it's because we're
dealing with a regexp known to be trivial, so the backslash
}
c = *base_pat++;
TRANSLATE (translated, trt, c);
- TRANSLATE (inverse, inverse_trt, c);
-
- /* Did this char actually get translated?
- Would any other char get translated into it? */
- if (translated != c || inverse != c)
- {
- /* Keep track of which character set row
- contains the characters that need translation. */
- int charset_base_code = c & ~0xff;
- if (charset_base == -1)
- charset_base = charset_base_code;
- else if (charset_base != charset_base_code)
- /* If two different rows appear, needing translation,
- then we cannot use boyer_moore search. */
- simple = 0;
- }
*pat++ = translated;
}
}
len = raw_pattern_size;
pat = base_pat = patbuf;
- if (simple)
+ if (boyer_moore_ok)
return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
pos, pos_byte, lim, lim_byte,
charset_base);
while (! CHAR_HEAD_P (*charstart))
charstart--;
untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
- if (charset_base == (untranslated & ~0xff))
+ if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
{
TRANSLATE (ch, trt, untranslated);
if (! CHAR_HEAD_P (*ptr))
}
/* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
- for a match just found in the current buffer. */
+ for the overall match just found in the current buffer.
+ Also clear out the match data for registers 1 and up. */
static void
set_search_regs (beg_byte, nbytes)
int beg_byte, nbytes;
{
+ int i;
+
/* Make sure we have registers in which to store
the match position. */
if (search_regs.num_regs == 0)
search_regs.num_regs = 2;
}
+ /* Clear out the other registers. */
+ for (i = 1; i < search_regs.num_regs; i++)
+ {
+ search_regs.start[i] = -1;
+ search_regs.end[i] = -1;
+ }
+
search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
XSETBUFFER (last_thing_searched, current_buffer);
return build_string ("");
adjust = - punct_count + 5 * (word_count - 1) + 4;
- val = make_uninit_multibyte_string (len + adjust,
- STRING_BYTES (XSTRING (string)) + adjust);
+ if (STRING_MULTIBYTE (string))
+ val = make_uninit_multibyte_string (len + adjust,
+ STRING_BYTES (XSTRING (string))
+ + adjust);
+ else
+ val = make_uninit_string (len + adjust);
o = XSTRING (val)->data;
*o++ = '\\';
if (STRING_MULTIBYTE (string))
FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
else
- c = XSTRING (string)->data[i++];
+ {
+ c = XSTRING (string)->data[i++];
+ i_byte++;
+ }
if (SYNTAX (c) == Sword)
{
Lisp_Object newtext, fixedcase, literal, string, subexp;
{
enum { nochange, all_caps, cap_initial } case_action;
- register int pos, last;
+ register int pos, pos_byte;
int some_multiletter_word;
int some_lowercase;
int some_uppercase;
if (NILP (fixedcase))
{
- int beg;
/* Decide how to casify by examining the matched text. */
+ int last;
- if (NILP (string))
- last = CHAR_TO_BYTE (search_regs.end[sub]);
- else
- last = search_regs.end[sub];
+ pos = search_regs.start[sub];
+ last = search_regs.end[sub];
if (NILP (string))
- beg = CHAR_TO_BYTE (search_regs.start[sub]);
+ pos_byte = CHAR_TO_BYTE (pos);
else
- beg = search_regs.start[sub];
+ pos_byte = string_char_to_byte (string, pos);
prevc = '\n';
case_action = all_caps;
some_nonuppercase_initial = 0;
some_uppercase = 0;
- for (pos = beg; pos < last; pos++)
+ while (pos < last)
{
if (NILP (string))
- c = FETCH_BYTE (pos);
+ {
+ c = FETCH_CHAR (pos_byte);
+ INC_BOTH (pos, pos_byte);
+ }
else
- c = XSTRING (string)->data[pos];
+ FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
if (LOWERCASEP (c))
{
if desired. */
if (NILP (literal))
{
- int lastpos = -1;
- int lastpos_byte = -1;
+ int lastpos = 0;
+ int lastpos_byte = 0;
/* We build up the substituted string in ACCUM. */
Lisp_Object accum;
Lisp_Object middle;
- int pos_byte;
+ int length = STRING_BYTES (XSTRING (newtext));
accum = Qnil;
- for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
+ for (pos_byte = 0, pos = 0; pos_byte < length;)
{
int substart = -1;
int subend;
}
if (substart >= 0)
{
- if (pos - 1 != lastpos + 1)
- middle = substring_both (newtext, lastpos + 1,
- lastpos_byte + 1,
- pos - 1, pos_byte - 1);
+ if (pos - 2 != lastpos)
+ middle = substring_both (newtext, lastpos,
+ lastpos_byte,
+ pos - 2, pos_byte - 2);
else
middle = Qnil;
accum = concat3 (accum, middle,
}
else if (delbackslash)
{
- middle = substring_both (newtext, lastpos + 1,
- lastpos_byte + 1,
- pos, pos_byte);
+ middle = substring_both (newtext, lastpos,
+ lastpos_byte,
+ pos - 1, pos_byte - 1);
accum = concat2 (accum, middle);
lastpos = pos;
}
}
- if (pos != lastpos + 1)
- middle = substring_both (newtext, lastpos + 1,
- lastpos_byte + 1,
+ if (pos != lastpos)
+ middle = substring_both (newtext, lastpos,
+ lastpos_byte,
pos, pos_byte);
else
middle = Qnil;
}
/* Record point, the move (quietly) to the start of the match. */
- if (PT > search_regs.start[sub])
+ if (PT >= search_regs.end[sub])
opoint = PT - ZV;
+ else if (PT > search_regs.start[sub])
+ opoint = search_regs.end[sub] - ZV;
else
opoint = PT;
Finsert_and_inherit (1, &newtext);
else
{
- struct gcpro gcpro1;
- GCPRO1 (newtext);
-
- for (pos = 0; pos < XSTRING (newtext)->size; pos++)
+ int length = STRING_BYTES (XSTRING (newtext));
+ unsigned char *substed;
+ int substed_alloc_size, substed_len;
+ int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
+ int str_multibyte = STRING_MULTIBYTE (newtext);
+ Lisp_Object rev_tbl;
+
+ rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
+ ? Fchar_table_extra_slot (Vnonascii_translation_table,
+ make_number (0))
+ : Qnil);
+
+ substed_alloc_size = length * 2 + 100;
+ substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
+ substed_len = 0;
+
+ /* Go thru NEWTEXT, producing the actual text to insert in
+ SUBSTED while adjusting multibyteness to that of the current
+ buffer. */
+
+ for (pos_byte = 0, pos = 0; pos_byte < length;)
{
- int offset = PT - search_regs.start[sub];
+ unsigned char str[MAX_MULTIBYTE_LENGTH];
+ unsigned char *add_stuff;
+ int add_len;
+ int idx = -1;
+
+ if (str_multibyte)
+ {
+ FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
+ if (!buf_multibyte)
+ c = multibyte_char_to_unibyte (c, rev_tbl);
+ }
+ else
+ {
+ /* Note that we don't have to increment POS. */
+ c = XSTRING (newtext)->data[pos_byte++];
+ if (buf_multibyte)
+ c = unibyte_char_to_multibyte (c);
+ }
+
+ /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
+ or set IDX to a match index, which means put that part
+ of the buffer text into SUBSTED. */
- c = XSTRING (newtext)->data[pos];
if (c == '\\')
{
- c = XSTRING (newtext)->data[++pos];
+ if (str_multibyte)
+ {
+ FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
+ if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
+ c = multibyte_char_to_unibyte (c, rev_tbl);
+ }
+ else
+ {
+ c = XSTRING (newtext)->data[pos_byte++];
+ if (buf_multibyte)
+ c = unibyte_char_to_multibyte (c);
+ }
+
if (c == '&')
- Finsert_buffer_substring
- (Fcurrent_buffer (),
- make_number (search_regs.start[sub] + offset),
- make_number (search_regs.end[sub] + offset));
+ idx = sub;
else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
{
if (search_regs.start[c - '0'] >= 1)
- Finsert_buffer_substring
- (Fcurrent_buffer (),
- make_number (search_regs.start[c - '0'] + offset),
- make_number (search_regs.end[c - '0'] + offset));
+ idx = c - '0';
}
else if (c == '\\')
- insert_char (c);
+ add_len = 1, add_stuff = "\\";
else
- error ("Invalid use of `\\' in replacement text");
+ {
+ xfree (substed);
+ error ("Invalid use of `\\' in replacement text");
+ }
}
else
- insert_char (c);
+ {
+ add_len = CHAR_STRING (c, str);
+ add_stuff = str;
+ }
+
+ /* If we want to copy part of a previous match,
+ set up ADD_STUFF and ADD_LEN to point to it. */
+ if (idx >= 0)
+ {
+ int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
+ add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
+ if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
+ move_gap (search_regs.start[idx]);
+ add_stuff = BYTE_POS_ADDR (begbyte);
+ }
+
+ /* Now the stuff we want to add to SUBSTED
+ is invariably ADD_LEN bytes starting at ADD_STUFF. */
+
+ /* Make sure SUBSTED is big enough. */
+ if (substed_len + add_len >= substed_alloc_size)
+ {
+ substed_alloc_size = substed_len + add_len + 500;
+ substed = (unsigned char *) xrealloc (substed,
+ substed_alloc_size + 1);
+ }
+
+ /* Now add to the end of SUBSTED. */
+ bcopy (add_stuff, substed + substed_len, add_len);
+ substed_len += add_len;
}
- UNGCPRO;
+
+ /* Now insert what we accumulated. */
+ insert_and_inherit (substed, substed_len);
+
+ xfree (substed);
}
inslen = PT - (search_regs.start[sub]);
/* If REUSE is a list, store as many value elements as will fit
into the elements of REUSE. */
for (i = 0, tail = reuse; CONSP (tail);
- i++, tail = XCONS (tail)->cdr)
+ i++, tail = XCDR (tail))
{
if (i < 2 * len + 2)
- XCONS (tail)->car = data[i];
+ XCAR (tail) = data[i];
else
- XCONS (tail)->car = Qnil;
+ XCAR (tail) = Qnil;
prev = tail;
}
/* If we couldn't fit all value elements into REUSE,
cons up the rest of them and add them to the end of REUSE. */
if (i < 2 * len + 2)
- XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
+ XCDR (prev) = Flist (2 * len + 2 - i, data + i);
return reuse;
}