+ if (val >= 0)
+ {
+ pos_byte = search_regs.end[0] + BEGV_BYTE;
+ for (i = 0; i < search_regs.num_regs; i++)
+ if (search_regs.start[i] >= 0)
+ {
+ search_regs.start[i]
+ = BYTE_TO_CHAR (search_regs.start[i] + BEGV_BYTE);
+ search_regs.end[i]
+ = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
+ }
+ XSETBUFFER (last_thing_searched, current_buffer);
+ pos = search_regs.end[0];
+ }
+ else
+ {
+ immediate_quit = 0;
+ return (0 - n);
+ }
+ n--;
+ }
+ immediate_quit = 0;
+ return (pos);
+ }
+ else /* non-RE case */
+ {
+ unsigned char *raw_pattern, *pat;
+ int raw_pattern_size;
+ int raw_pattern_size_byte;
+ unsigned char *patbuf;
+ int multibyte = !NILP (current_buffer->enable_multibyte_characters);
+ unsigned char *base_pat = XSTRING (string)->data;
+ int charset_base = -1;
+ int boyer_moore_ok = 1;
+
+ /* MULTIBYTE says whether the text to be searched is multibyte.
+ We must convert PATTERN to match that, or we will not really
+ find things right. */
+
+ if (multibyte == STRING_MULTIBYTE (string))
+ {
+ raw_pattern = (unsigned char *) XSTRING (string)->data;
+ raw_pattern_size = XSTRING (string)->size;
+ raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
+ }
+ else if (multibyte)
+ {
+ raw_pattern_size = XSTRING (string)->size;
+ raw_pattern_size_byte
+ = count_size_as_multibyte (XSTRING (string)->data,
+ raw_pattern_size);
+ raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
+ copy_text (XSTRING (string)->data, raw_pattern,
+ XSTRING (string)->size, 0, 1);
+ }
+ else
+ {
+ /* Converting multibyte to single-byte.
+
+ ??? Perhaps this conversion should be done in a special way
+ by subtracting nonascii-insert-offset from each non-ASCII char,
+ so that only the multibyte chars which really correspond to
+ the chosen single-byte character set can possibly match. */
+ raw_pattern_size = XSTRING (string)->size;
+ raw_pattern_size_byte = XSTRING (string)->size;
+ raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
+ copy_text (XSTRING (string)->data, raw_pattern,
+ STRING_BYTES (XSTRING (string)), 1, 0);
+ }
+
+ /* Copy and optionally translate the pattern. */
+ len = raw_pattern_size;
+ len_byte = raw_pattern_size_byte;
+ patbuf = (unsigned char *) alloca (len_byte);
+ pat = patbuf;
+ base_pat = raw_pattern;
+ if (multibyte)
+ {
+ while (--len >= 0)
+ {
+ unsigned char str[MAX_MULTIBYTE_LENGTH];
+ int c, translated, inverse;
+ int in_charlen, charlen;
+
+ /* If we got here and the RE flag is set, it's because we're
+ dealing with a regexp known to be trivial, so the backslash
+ just quotes the next character. */
+ if (RE && *base_pat == '\\')
+ {
+ len--;
+ len_byte--;
+ base_pat++;
+ }
+
+ c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
+
+ /* Translate the character, if requested. */
+ TRANSLATE (translated, trt, c);
+ /* If translation changed the byte-length, go back
+ to the original character. */
+ charlen = CHAR_STRING (translated, str);
+ if (in_charlen != charlen)
+ {
+ translated = c;
+ charlen = CHAR_STRING (c, str);
+ }
+
+ /* If we are searching for something strange,
+ an invalid multibyte code, don't use boyer-moore. */
+ if (! ASCII_BYTE_P (translated)
+ && (charlen == 1 /* 8bit code */
+ || charlen != in_charlen /* invalid multibyte code */
+ ))
+ boyer_moore_ok = 0;
+
+ TRANSLATE (inverse, inverse_trt, c);
+
+ /* Did this char actually get translated?
+ Would any other char get translated into it? */
+ if (translated != c || inverse != c)
+ {
+ /* Keep track of which character set row
+ contains the characters that need translation. */
+ int charset_base_code = c & ~CHAR_FIELD3_MASK;
+ if (charset_base == -1)
+ charset_base = charset_base_code;
+ else if (charset_base != charset_base_code)
+ /* If two different rows appear, needing translation,
+ then we cannot use boyer_moore search. */
+ boyer_moore_ok = 0;
+ }
+
+ /* Store this character into the translated pattern. */
+ bcopy (str, pat, charlen);
+ pat += charlen;
+ base_pat += in_charlen;
+ len_byte -= in_charlen;
+ }
+ }
+ else
+ {
+ /* Unibyte buffer. */
+ charset_base = 0;
+ while (--len >= 0)
+ {
+ int c, translated;
+
+ /* If we got here and the RE flag is set, it's because we're
+ dealing with a regexp known to be trivial, so the backslash
+ just quotes the next character. */
+ if (RE && *base_pat == '\\')
+ {
+ len--;
+ base_pat++;
+ }
+ c = *base_pat++;
+ TRANSLATE (translated, trt, c);
+ *pat++ = translated;
+ }
+ }
+
+ len_byte = pat - patbuf;
+ len = raw_pattern_size;
+ pat = base_pat = patbuf;
+
+ if (boyer_moore_ok)
+ return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
+ pos, pos_byte, lim, lim_byte,
+ charset_base);
+ else
+ return simple_search (n, pat, len, len_byte, trt,
+ pos, pos_byte, lim, lim_byte);
+ }
+}
+\f
+/* Do a simple string search N times for the string PAT,
+ whose length is LEN/LEN_BYTE,
+ from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
+ TRT is the translation table.
+
+ Return the character position where the match is found.
+ Otherwise, if M matches remained to be found, return -M.
+
+ This kind of search works regardless of what is in PAT and
+ regardless of what is in TRT. It is used in cases where
+ boyer_moore cannot work. */
+
+static int
+simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
+ int n;
+ unsigned char *pat;
+ int len, len_byte;
+ Lisp_Object trt;
+ int pos, pos_byte;
+ int lim, lim_byte;
+{
+ int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+ int forward = n > 0;
+
+ if (lim > pos && multibyte)
+ while (n > 0)
+ {
+ while (1)
+ {
+ /* Try matching at position POS. */
+ int this_pos = pos;
+ int this_pos_byte = pos_byte;
+ int this_len = len;
+ int this_len_byte = len_byte;
+ unsigned char *p = pat;
+ if (pos + len > lim)
+ goto stop;
+
+ while (this_len > 0)
+ {
+ int charlen, buf_charlen;
+ int pat_ch, buf_ch;
+
+ pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
+ buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
+ ZV_BYTE - this_pos_byte,
+ buf_charlen);
+ TRANSLATE (buf_ch, trt, buf_ch);
+
+ if (buf_ch != pat_ch)
+ break;
+
+ this_len_byte -= charlen;
+ this_len--;
+ p += charlen;
+
+ this_pos_byte += buf_charlen;
+ this_pos++;
+ }
+
+ if (this_len == 0)
+ {
+ pos += len;
+ pos_byte += len_byte;
+ break;
+ }
+
+ INC_BOTH (pos, pos_byte);
+ }
+
+ n--;
+ }
+ else if (lim > pos)
+ while (n > 0)
+ {
+ while (1)
+ {
+ /* Try matching at position POS. */
+ int this_pos = pos;
+ int this_len = len;
+ unsigned char *p = pat;
+
+ if (pos + len > lim)
+ goto stop;
+
+ while (this_len > 0)
+ {
+ int pat_ch = *p++;
+ int buf_ch = FETCH_BYTE (this_pos);
+ TRANSLATE (buf_ch, trt, buf_ch);
+
+ if (buf_ch != pat_ch)
+ break;
+
+ this_len--;
+ this_pos++;
+ }
+
+ if (this_len == 0)
+ {
+ pos += len;
+ break;
+ }
+
+ pos++;
+ }
+
+ n--;
+ }
+ /* Backwards search. */
+ else if (lim < pos && multibyte)
+ while (n < 0)
+ {
+ while (1)
+ {
+ /* Try matching at position POS. */
+ int this_pos = pos - len;
+ int this_pos_byte = pos_byte - len_byte;
+ int this_len = len;
+ int this_len_byte = len_byte;
+ unsigned char *p = pat;
+
+ if (pos - len < lim)
+ goto stop;
+
+ while (this_len > 0)
+ {
+ int charlen, buf_charlen;
+ int pat_ch, buf_ch;
+
+ pat_ch = STRING_CHAR_AND_LENGTH (p, this_len_byte, charlen);
+ buf_ch = STRING_CHAR_AND_LENGTH (BYTE_POS_ADDR (this_pos_byte),
+ ZV_BYTE - this_pos_byte,
+ buf_charlen);
+ TRANSLATE (buf_ch, trt, buf_ch);
+
+ if (buf_ch != pat_ch)
+ break;
+
+ this_len_byte -= charlen;
+ this_len--;
+ p += charlen;
+ this_pos_byte += buf_charlen;
+ this_pos++;
+ }
+
+ if (this_len == 0)
+ {
+ pos -= len;
+ pos_byte -= len_byte;
+ break;
+ }
+
+ DEC_BOTH (pos, pos_byte);
+ }
+
+ n++;
+ }
+ else if (lim < pos)
+ while (n < 0)
+ {
+ while (1)
+ {
+ /* Try matching at position POS. */
+ int this_pos = pos - len;
+ int this_len = len;
+ unsigned char *p = pat;
+
+ if (pos - len < lim)
+ goto stop;
+
+ while (this_len > 0)
+ {
+ int pat_ch = *p++;
+ int buf_ch = FETCH_BYTE (this_pos);
+ TRANSLATE (buf_ch, trt, buf_ch);
+
+ if (buf_ch != pat_ch)
+ break;
+ this_len--;
+ this_pos++;
+ }
+
+ if (this_len == 0)
+ {
+ pos -= len;
+ break;
+ }
+
+ pos--;
+ }
+
+ n++;
+ }
+
+ stop:
+ if (n == 0)
+ {
+ if (forward)
+ set_search_regs ((multibyte ? pos_byte : pos) - len_byte, len_byte);
+ else
+ set_search_regs (multibyte ? pos_byte : pos, len_byte);
+
+ return pos;
+ }
+ else if (n > 0)
+ return -n;
+ else
+ return n;
+}
+\f
+/* Do Boyer-Moore search N times for the string PAT,
+ whose length is LEN/LEN_BYTE,
+ from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
+ DIRECTION says which direction we search in.
+ TRT and INVERSE_TRT are translation tables.
+
+ This kind of search works if all the characters in PAT that have
+ nontrivial translation are the same aside from the last byte. This
+ makes it possible to translate just the last byte of a character,
+ and do so after just a simple test of the context.
+
+ If that criterion is not satisfied, do not call this function. */
+
+static int
+boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
+ pos, pos_byte, lim, lim_byte, charset_base)
+ int n;
+ unsigned char *base_pat;
+ int len, len_byte;
+ Lisp_Object trt;
+ Lisp_Object inverse_trt;
+ int pos, pos_byte;
+ int lim, lim_byte;
+ int charset_base;
+{
+ int direction = ((n > 0) ? 1 : -1);
+ register int dirlen;
+ int infinity, limit, stride_for_teases = 0;
+ register int *BM_tab;
+ int *BM_tab_base;
+ register unsigned char *cursor, *p_limit;
+ register int i, j;
+ unsigned char *pat, *pat_end;
+ int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+
+ unsigned char simple_translate[0400];
+ int translate_prev_byte = 0;
+ int translate_anteprev_byte = 0;
+
+#ifdef C_ALLOCA
+ int BM_tab_space[0400];
+ BM_tab = &BM_tab_space[0];
+#else
+ BM_tab = (int *) alloca (0400 * sizeof (int));
+#endif
+ /* The general approach is that we are going to maintain that we know */
+ /* the first (closest to the present position, in whatever direction */
+ /* we're searching) character that could possibly be the last */
+ /* (furthest from present position) character of a valid match. We */
+ /* advance the state of our knowledge by looking at that character */
+ /* and seeing whether it indeed matches the last character of the */
+ /* pattern. If it does, we take a closer look. If it does not, we */
+ /* move our pointer (to putative last characters) as far as is */
+ /* logically possible. This amount of movement, which I call a */
+ /* stride, will be the length of the pattern if the actual character */
+ /* appears nowhere in the pattern, otherwise it will be the distance */
+ /* from the last occurrence of that character to the end of the */
+ /* pattern. */
+ /* As a coding trick, an enormous stride is coded into the table for */
+ /* characters that match the last character. This allows use of only */
+ /* a single test, a test for having gone past the end of the */
+ /* permissible match region, to test for both possible matches (when */
+ /* the stride goes past the end immediately) and failure to */
+ /* match (where you get nudged past the end one stride at a time). */
+
+ /* Here we make a "mickey mouse" BM table. The stride of the search */
+ /* is determined only by the last character of the putative match. */
+ /* If that character does not match, we will stride the proper */
+ /* distance to propose a match that superimposes it on the last */
+ /* instance of a character that matches it (per trt), or misses */
+ /* it entirely if there is none. */
+
+ dirlen = len_byte * direction;
+ infinity = dirlen - (lim_byte + pos_byte + len_byte + len_byte) * direction;
+
+ /* Record position after the end of the pattern. */
+ pat_end = base_pat + len_byte;
+ /* BASE_PAT points to a character that we start scanning from.
+ It is the first character in a forward search,
+ the last character in a backward search. */
+ if (direction < 0)
+ base_pat = pat_end - 1;
+
+ BM_tab_base = BM_tab;
+ BM_tab += 0400;
+ j = dirlen; /* to get it in a register */
+ /* A character that does not appear in the pattern induces a */
+ /* stride equal to the pattern length. */
+ while (BM_tab_base != BM_tab)
+ {
+ *--BM_tab = j;
+ *--BM_tab = j;
+ *--BM_tab = j;
+ *--BM_tab = j;
+ }
+
+ /* We use this for translation, instead of TRT itself.
+ We fill this in to handle the characters that actually
+ occur in the pattern. Others don't matter anyway! */
+ bzero (simple_translate, sizeof simple_translate);
+ for (i = 0; i < 0400; i++)
+ simple_translate[i] = i;
+
+ i = 0;
+ while (i != infinity)
+ {
+ unsigned char *ptr = base_pat + i;
+ i += direction;
+ if (i == dirlen)
+ i = infinity;
+ if (! NILP (trt))
+ {
+ int ch;
+ int untranslated;
+ int this_translated = 1;
+
+ if (multibyte
+ /* Is *PTR the last byte of a character? */
+ && (pat_end - ptr == 1 || CHAR_HEAD_P (ptr[1])))
+ {
+ unsigned char *charstart = ptr;
+ while (! CHAR_HEAD_P (*charstart))
+ charstart--;
+ untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
+ if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
+ {
+ TRANSLATE (ch, trt, untranslated);
+ if (! CHAR_HEAD_P (*ptr))
+ {
+ translate_prev_byte = ptr[-1];
+ if (! CHAR_HEAD_P (translate_prev_byte))
+ translate_anteprev_byte = ptr[-2];
+ }
+ }
+ else
+ {
+ this_translated = 0;
+ ch = *ptr;
+ }
+ }
+ else if (!multibyte)
+ TRANSLATE (ch, trt, *ptr);
+ else