+ unsigned char *raw_pattern, *pat;
+ int raw_pattern_size;
+ int raw_pattern_size_byte;
+ unsigned char *patbuf;
+ int multibyte = !NILP (current_buffer->enable_multibyte_characters);
+ unsigned char *base_pat = XSTRING (string)->data;
+ int charset_base = -1;
+ int boyer_moore_ok = 1;
+
+ /* MULTIBYTE says whether the text to be searched is multibyte.
+ We must convert PATTERN to match that, or we will not really
+ find things right. */
+
+ if (multibyte == STRING_MULTIBYTE (string))
+ {
+ raw_pattern = (unsigned char *) XSTRING (string)->data;
+ raw_pattern_size = XSTRING (string)->size;
+ raw_pattern_size_byte = STRING_BYTES (XSTRING (string));
+ }
+ else if (multibyte)
+ {
+ raw_pattern_size = XSTRING (string)->size;
+ raw_pattern_size_byte
+ = count_size_as_multibyte (XSTRING (string)->data,
+ raw_pattern_size);
+ raw_pattern = (unsigned char *) alloca (raw_pattern_size_byte + 1);
+ copy_text (XSTRING (string)->data, raw_pattern,
+ XSTRING (string)->size, 0, 1);
+ }
+ else
+ {
+ /* Converting multibyte to single-byte.
+
+ ??? Perhaps this conversion should be done in a special way
+ by subtracting nonascii-insert-offset from each non-ASCII char,
+ so that only the multibyte chars which really correspond to
+ the chosen single-byte character set can possibly match. */
+ raw_pattern_size = XSTRING (string)->size;
+ raw_pattern_size_byte = XSTRING (string)->size;
+ raw_pattern = (unsigned char *) alloca (raw_pattern_size + 1);
+ copy_text (XSTRING (string)->data, raw_pattern,
+ STRING_BYTES (XSTRING (string)), 1, 0);
+ }
+
+ /* Copy and optionally translate the pattern. */
+ len = raw_pattern_size;
+ len_byte = raw_pattern_size_byte;
+ patbuf = (unsigned char *) alloca (len_byte);
+ pat = patbuf;
+ base_pat = raw_pattern;
+ if (multibyte)
+ {
+ while (--len >= 0)
+ {
+ unsigned char str[MAX_MULTIBYTE_LENGTH];
+ int c, translated, inverse;
+ int in_charlen, charlen;
+
+ /* If we got here and the RE flag is set, it's because we're
+ dealing with a regexp known to be trivial, so the backslash
+ just quotes the next character. */
+ if (RE && *base_pat == '\\')
+ {
+ len--;
+ len_byte--;
+ base_pat++;
+ }
+
+ c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
+
+ /* Translate the character, if requested. */
+ TRANSLATE (translated, trt, c);
+ /* If translation changed the byte-length, go back
+ to the original character. */
+ charlen = CHAR_STRING (translated, str);
+ if (in_charlen != charlen)
+ {
+ translated = c;
+ charlen = CHAR_STRING (c, str);
+ }
+
+ /* If we are searching for something strange,
+ an invalid multibyte code, don't use boyer-moore. */
+ if (! ASCII_BYTE_P (translated)
+ && (charlen == 1 /* 8bit code */
+ || charlen != in_charlen /* invalid multibyte code */
+ ))
+ boyer_moore_ok = 0;
+
+ TRANSLATE (inverse, inverse_trt, c);
+
+ /* Did this char actually get translated?
+ Would any other char get translated into it? */
+ if (translated != c || inverse != c)
+ {
+ /* Keep track of which character set row
+ contains the characters that need translation. */
+ int charset_base_code = c & ~CHAR_FIELD3_MASK;
+ if (charset_base == -1)
+ charset_base = charset_base_code;
+ else if (charset_base != charset_base_code)
+ /* If two different rows appear, needing translation,
+ then we cannot use boyer_moore search. */
+ boyer_moore_ok = 0;
+ }
+
+ /* Store this character into the translated pattern. */
+ bcopy (str, pat, charlen);
+ pat += charlen;
+ base_pat += in_charlen;
+ len_byte -= in_charlen;
+ }
+ }
+ else
+ {
+ /* Unibyte buffer. */
+ charset_base = 0;
+ while (--len >= 0)
+ {
+ int c, translated;
+
+ /* If we got here and the RE flag is set, it's because we're
+ dealing with a regexp known to be trivial, so the backslash
+ just quotes the next character. */
+ if (RE && *base_pat == '\\')
+ {
+ len--;
+ base_pat++;
+ }
+ c = *base_pat++;
+ TRANSLATE (translated, trt, c);
+ *pat++ = translated;
+ }
+ }
+
+ len_byte = pat - patbuf;
+ len = raw_pattern_size;
+ pat = base_pat = patbuf;
+
+ if (boyer_moore_ok)
+ return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
+ pos, pos_byte, lim, lim_byte,
+ charset_base);
+ else
+ return simple_search (n, pat, len, len_byte, trt,
+ pos, pos_byte, lim, lim_byte);
+ }
+}
+\f
+/* Do a simple string search N times for the string PAT,
+ whose length is LEN/LEN_BYTE,
+ from buffer position POS/POS_BYTE until LIM/LIM_BYTE.
+ TRT is the translation table.
+
+ Return the character position where the match is found.
+ Otherwise, if M matches remained to be found, return -M.
+
+ This kind of search works regardless of what is in PAT and
+ regardless of what is in TRT. It is used in cases where
+ boyer_moore cannot work. */
+
+static int
+simple_search (n, pat, len, len_byte, trt, pos, pos_byte, lim, lim_byte)
+ int n;
+ unsigned char *pat;
+ int len, len_byte;
+ Lisp_Object trt;
+ int pos, pos_byte;
+ int lim, lim_byte;
+{
+ int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
+ int forward = n > 0;
+
+ if (lim > pos && multibyte)
+ while (n > 0)