X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/aaf34461ff5804e5cebe163b31e535da72e81d87..b4b2c2cafcf7d9d441be9d0930c69709a995420e:/src/regex.c

diff --git a/src/regex.c b/src/regex.c
index 66b84c9dfc..91259dc779 100644
--- a/src/regex.c
+++ b/src/regex.c
@@ -3,7 +3,7 @@
    internationalization features.)
 
    Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
-                 2002, 2003, 2004, 2005, 2006, 2007
+                 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
                  Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
@@ -124,7 +124,7 @@
 # define SYNTAX_ENTRY_VIA_PROPERTY
 
 # include "syntax.h"
-# include "charset.h"
+# include "character.h"
 # include "category.h"
 
 # ifdef malloc
@@ -145,28 +145,48 @@
 # define POS_AS_IN_BUFFER(p) ((p) + (NILP (re_match_object) || BUFFERP (re_match_object)))
 
 # define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
-# define RE_STRING_CHAR(p, s) \
+# define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
+# define RE_STRING_CHAR(p, s, multibyte) \
   (multibyte ? (STRING_CHAR (p, s)) : (*(p)))
-# define RE_STRING_CHAR_AND_LENGTH(p, s, len) \
+# define RE_STRING_CHAR_AND_LENGTH(p, s, len, multibyte) \
   (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p)))
 
-/* Set C a (possibly multibyte) character before P.  P points into a
-   string which is the virtual concatenation of STR1 (which ends at
-   END1) or STR2 (which ends at END2).  */
-# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)		\
-  do {									\
-    if (multibyte)							\
-       {						    		\
-	 re_char *dtemp = (p) == (str2) ? (end1) : (p);		    	\
-	 re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
-	 re_char *d0 = dtemp;						\
-	 PREV_CHAR_BOUNDARY (d0, dlimit);				\
-	 c = STRING_CHAR (d0, dtemp - d0);				\
-       }						    		\
-     else						    		\
-       (c = ((p) == (str2) ? (end1) : (p))[-1]);			\
+# define RE_CHAR_TO_MULTIBYTE(c) unibyte_to_multibyte_table[(c)]
+
+# define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
+
+/* Set C a (possibly converted to multibyte) character before P.  P
+   points into a string which is the virtual concatenation of STR1
+   (which ends at END1) or STR2 (which ends at END2).  */
+# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2)		     \
+  do {									     \
+    if (target_multibyte)						     \
+      {									     \
+	re_char *dtemp = (p) == (str2) ? (end1) : (p);			     \
+	re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
+	while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp));		     \
+	c = STRING_CHAR (dtemp, (p) - dtemp);				     \
+      }									     \
+    else								     \
+      {									     \
+	(c = ((p) == (str2) ? (end1) : (p))[-1]);			     \
+	(c) = RE_CHAR_TO_MULTIBYTE (c);					     \
+      }									     \
   } while (0)
 
+/* Set C a (possibly converted to multibyte) character at P, and set
+   LEN to the byte length of that character.  */
+# define GET_CHAR_AFTER(c, p, len)		\
+  do {						\
+    if (target_multibyte)			\
+      (c) = STRING_CHAR_AND_LENGTH (p, 0, len);	\
+    else					\
+      {						\
+	(c) = *p;				\
+	len = 1;				\
+	(c) = RE_CHAR_TO_MULTIBYTE (c);		\
+      }						\
+   } while (0)
 
 #else  /* not emacs */
 
@@ -266,11 +286,7 @@ xrealloc (block, size)
 /* Sword must be nonzero for the wordchar pattern commands in re_match_2.  */
 enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
 
-# ifdef SWITCH_ENUM_BUG
-#  define SWITCH_ENUM_CAST(x) ((int)(x))
-# else
 #  define SWITCH_ENUM_CAST(x) (x)
-# endif
 
 /* Dummy macros for non-Emacs environments.  */
 # define BASE_LEADING_CODE_P(c) (0)
@@ -278,6 +294,7 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
 # define CHARSET_LEADING_CODE_BASE(c) 0
 # define MAX_MULTIBYTE_LENGTH 1
 # define RE_MULTIBYTE_P(x) 0
+# define RE_TARGET_MULTIBYTE_P(x) 0
 # define WORD_BOUNDARY_P(c1, c2) (0)
 # define CHAR_HEAD_P(p) (1)
 # define SINGLE_BYTE_CHAR_P(c) (1)
@@ -285,13 +302,21 @@ enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
 # define MULTIBYTE_FORM_LENGTH(p, s) (1)
 # define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
 # define STRING_CHAR(p, s) (*(p))
-# define RE_STRING_CHAR STRING_CHAR
+# define RE_STRING_CHAR(p, s, multibyte) STRING_CHAR ((p), (s))
 # define CHAR_STRING(c, s) (*(s) = (c), 1)
 # define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
-# define RE_STRING_CHAR_AND_LENGTH STRING_CHAR_AND_LENGTH
+# define RE_STRING_CHAR_AND_LENGTH(p, s, len, multibyte) STRING_CHAR_AND_LENGTH ((p), (s), (len))
+# define RE_CHAR_TO_MULTIBYTE(c) (c)
+# define RE_CHAR_TO_UNIBYTE(c) (c)
 # define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
   (c = ((p) == (str2) ? *((end1) - 1) : *((p) - 1)))
+# define GET_CHAR_AFTER(c, p, len)	\
+  (c = *p, len = 1)
 # define MAKE_CHAR(charset, c1, c2) (c1)
+# define BYTE8_TO_CHAR(c) (c)
+# define CHAR_BYTE8_P(c) (0)
+# define CHAR_LEADING_CODE(c) (c)
+
 #endif /* not emacs */
 
 #ifndef RE_TRANSLATE
@@ -497,7 +522,7 @@ init_syntax_once ()
 #  ifdef __GNUC__
 #   define alloca __builtin_alloca
 #  else /* not __GNUC__ */
-#   if HAVE_ALLOCA_H
+#   ifdef HAVE_ALLOCA_H
 #    include <alloca.h>
 #   endif /* HAVE_ALLOCA_H */
 #  endif /* not __GNUC__ */
@@ -1366,18 +1391,12 @@ static const char *re_error_msgid[] =
 /* Normally, this is fine.  */
 #define MATCH_MAY_ALLOCATE
 
-/* When using GNU C, we are not REALLY using the C alloca, no matter
-   what config.h may say.  So don't take precautions for it.  */
-#ifdef __GNUC__
-# undef C_ALLOCA
-#endif
-
 /* The match routines may not allocate if (1) they would do it with malloc
    and (2) it's not safe for them to use malloc.
    Note that if REL_ALLOC is defined, matching would not use malloc for the
    failure stack, but we would still use it for the register vectors;
    so REL_ALLOC should not affect this.  */
-#if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
+#if defined REGEX_MALLOC && defined emacs
 # undef MATCH_MAY_ALLOCATE
 #endif
 
@@ -1737,7 +1756,7 @@ static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
   do {									\
     int len;								\
     if (p == pend) return REG_EEND;					\
-    c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len);			\
+    c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len, multibyte);	\
     p += len;								\
   } while (0)
 
@@ -1832,8 +1851,10 @@ static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
    being larger than MAX_BUF_SIZE, then flag memory exhausted.  */
 #if __BOUNDED_POINTERS__
 # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
-# define MOVE_BUFFER_POINTER(P) \
-  (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
+# define MOVE_BUFFER_POINTER(P)					\
+  (__ptrlow (P) = new_buffer + (__ptrlow (P) - old_buffer),	\
+   SET_HIGH_BOUND (P),						\
+   __ptrvalue (P) = new_buffer + (__ptrvalue (P) - old_buffer))
 # define ELSE_EXTEND_BUFFER_HIGH_BOUND		\
   else						\
     {						\
@@ -1847,12 +1868,12 @@ static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
 	SET_HIGH_BOUND (pending_exact);		\
     }
 #else
-# define MOVE_BUFFER_POINTER(P) (P) += incr
+# define MOVE_BUFFER_POINTER(P) ((P) = new_buffer + ((P) - old_buffer))
 # define ELSE_EXTEND_BUFFER_HIGH_BOUND
 #endif
 #define EXTEND_BUFFER()							\
   do {									\
-    re_char *old_buffer = bufp->buffer;					\
+    unsigned char *old_buffer = bufp->buffer;				\
     if (bufp->allocated == MAX_BUF_SIZE)				\
       return REG_ESIZE;							\
     bufp->allocated <<= 1;						\
@@ -1864,7 +1885,7 @@ static int analyse_first _RE_ARGS ((re_char *p, re_char *pend,
     /* If the buffer moved, move all the pointers into it.  */		\
     if (old_buffer != bufp->buffer)					\
       {									\
-	int incr = bufp->buffer - old_buffer;				\
+	unsigned char *new_buffer = bufp->buffer;			\
 	MOVE_BUFFER_POINTER (b);					\
 	MOVE_BUFFER_POINTER (begalt);					\
 	if (fixup_alt_jump)						\
@@ -1948,10 +1969,10 @@ struct range_table_work_area
 
 #define EXTEND_RANGE_TABLE(work_area, n)				\
   do {									\
-    if (((work_area)->used + (n)) * sizeof (int) > (work_area)->allocated) \
+    if (((work_area).used + (n)) * sizeof (int) > (work_area).allocated) \
       {									\
-        extend_range_table_work_area (work_area);			\
-        if ((work_area)->table == 0)					\
+        extend_range_table_work_area (&work_area);			\
+        if ((work_area).table == 0)					\
           return (REG_ESPACE);						\
       }									\
   } while (0)
@@ -1968,15 +1989,12 @@ struct range_table_work_area
 #define BIT_UPPER	0x10
 #define BIT_MULTIBYTE	0x20
 
-/* Set a range START..END to WORK_AREA.
-   The range is passed through TRANSLATE, so START and END
-   should be untranslated.  */
-#define SET_RANGE_TABLE_WORK_AREA(work_area, start, end)		\
+/* Set a range (RANGE_START, RANGE_END) to WORK_AREA.  */
+#define SET_RANGE_TABLE_WORK_AREA(work_area, range_start, range_end)	\
   do {									\
-    int tem;								\
-    tem = set_image_of_range (&work_area, start, end, translate);	\
-    if (tem > 0)							\
-      FREE_STACK_RETURN (tem);						\
+    EXTEND_RANGE_TABLE ((work_area), 2);				\
+    (work_area).table[(work_area).used++] = (range_start);		\
+    (work_area).table[(work_area).used++] = (range_end);		\
   } while (0)
 
 /* Free allocated memory for WORK_AREA.  */
@@ -1996,6 +2014,113 @@ struct range_table_work_area
 #define SET_LIST_BIT(c) (b[((c)) / BYTEWIDTH] |= 1 << ((c) % BYTEWIDTH))
 
 
+#ifdef emacs
+
+/* Store characters in the range FROM to TO in the bitmap at B (for
+   ASCII and unibyte characters) and WORK_AREA (for multibyte
+   characters) while translating them and paying attention to the
+   continuity of translated characters.
+
+   Implementation note: It is better to implement these fairly big
+   macros by a function, but it's not that easy because macros called
+   in this macro assume various local variables already declared.  */
+
+/* Both FROM and TO are ASCII characters.  */
+
+#define SETUP_ASCII_RANGE(work_area, FROM, TO)			\
+  do {								\
+    int C0, C1;							\
+    								\
+    for (C0 = (FROM); C0 <= (TO); C0++)				\
+      {								\
+	C1 = TRANSLATE (C0);					\
+	if (! ASCII_CHAR_P (C1))				\
+	  {							\
+	    SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1);	\
+	    if ((C1 = RE_CHAR_TO_UNIBYTE (C1)) < 0)		\
+	      C1 = C0;						\
+	  }							\
+	SET_LIST_BIT (C1);					\
+      }								\
+  } while (0)
+
+
+/* Both FROM and TO are unibyte characters (0x80..0xFF).  */
+
+#define SETUP_UNIBYTE_RANGE(work_area, FROM, TO)			       \
+  do {									       \
+    int C0, C1, C2, I;							       \
+    int USED = RANGE_TABLE_WORK_USED (work_area);			       \
+    									       \
+    for (C0 = (FROM); C0 <= (TO); C0++)					       \
+      {									       \
+	C1 = RE_CHAR_TO_MULTIBYTE (C0);					       \
+	if (CHAR_BYTE8_P (C1))						       \
+	  SET_LIST_BIT (C0);						       \
+	else								       \
+	  {								       \
+	    C2 = TRANSLATE (C1);					       \
+	    if (C2 == C1						       \
+		|| (C1 = RE_CHAR_TO_UNIBYTE (C2)) < 0)			       \
+	      C1 = C0;							       \
+	    SET_LIST_BIT (C1);						       \
+	    for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
+	      {								       \
+		int from = RANGE_TABLE_WORK_ELT (work_area, I);		       \
+		int to = RANGE_TABLE_WORK_ELT (work_area, I + 1);	       \
+									       \
+		if (C2 >= from - 1 && C2 <= to + 1)			       \
+		  {							       \
+		    if (C2 == from - 1)					       \
+		      RANGE_TABLE_WORK_ELT (work_area, I)--;		       \
+		    else if (C2 == to + 1)				       \
+		      RANGE_TABLE_WORK_ELT (work_area, I + 1)++;	       \
+		    break;						       \
+		  }							       \
+	      }								       \
+	    if (I < USED)						       \
+	      SET_RANGE_TABLE_WORK_AREA ((work_area), C2, C2);		       \
+	  }								       \
+      }									       \
+  } while (0)
+
+
+/* Both FROM and TO are mulitbyte characters.  */
+
+#define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO)			   \
+  do {									   \
+    int C0, C1, C2, I, USED = RANGE_TABLE_WORK_USED (work_area);	   \
+    									   \
+    SET_RANGE_TABLE_WORK_AREA ((work_area), (FROM), (TO));		   \
+    for (C0 = (FROM); C0 <= (TO); C0++)					   \
+      {									   \
+	C1 = TRANSLATE (C0);						   \
+	if ((C2 = RE_CHAR_TO_UNIBYTE (C1)) >= 0				   \
+	    || (C1 != C0 && (C2 = RE_CHAR_TO_UNIBYTE (C0)) >= 0))	   \
+	  SET_LIST_BIT (C2);						   \
+	if (C1 >= (FROM) && C1 <= (TO))					   \
+	  continue;							   \
+	for (I = RANGE_TABLE_WORK_USED (work_area) - 2; I >= USED; I -= 2) \
+	  {								   \
+	    int from = RANGE_TABLE_WORK_ELT (work_area, I);		   \
+	    int to = RANGE_TABLE_WORK_ELT (work_area, I + 1);		   \
+	    								   \
+	    if (C1 >= from - 1 && C1 <= to + 1)				   \
+	      {								   \
+		if (C1 == from - 1)					   \
+		  RANGE_TABLE_WORK_ELT (work_area, I)--;		   \
+		else if (C1 == to + 1)					   \
+		  RANGE_TABLE_WORK_ELT (work_area, I + 1)++;		   \
+		break;							   \
+	      }								   \
+	  }								   \
+	if (I < USED)							   \
+	  SET_RANGE_TABLE_WORK_AREA ((work_area), C1, C1);		   \
+      }									   \
+  } while (0)
+
+#endif /* emacs */
+
 /* Get the next unsigned number in the uncompiled pattern.  */
 #define GET_UNSIGNED_NUMBER(num)					\
   do {									\
@@ -2119,6 +2244,7 @@ extend_range_table_work_area (work_area)
       = (int *) malloc (work_area->allocated);
 }
 
+#if 0
 #ifdef emacs
 
 /* Carefully find the ranges of codes that are equivalent
@@ -2351,6 +2477,7 @@ set_image_of_range (work_area, start, end, translate)
 
   return -1;
 }
+#endif	/* 0 */
 
 #ifndef MATCH_MAY_ALLOCATE
 
@@ -2489,6 +2616,9 @@ regex_compile (pattern, size, syntax, bufp)
   /* If the object matched can contain multibyte characters.  */
   const boolean multibyte = RE_MULTIBYTE_P (bufp);
 
+  /* If a target of matching can contain multibyte characters.  */
+  const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
+
   /* Nonzero if we have pushed down into a subpattern.  */
   int in_subpattern = 0;
 
@@ -2841,6 +2971,7 @@ regex_compile (pattern, size, syntax, bufp)
 	      {
 		boolean escaped_char = false;
 		const unsigned char *p2 = p;
+		re_wchar_t ch, c2;
 
 		if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 
@@ -2867,10 +2998,6 @@ regex_compile (pattern, size, syntax, bufp)
 		      break;
 		  }
 
-		/* What should we do for the character which is
-		   greater than 0x7F, but not BASE_LEADING_CODE_P?
-		   XXX */
-
 		/* See if we're at the beginning of a possible character
 		   class.  */
 
@@ -2907,8 +3034,8 @@ regex_compile (pattern, size, syntax, bufp)
 		       them).  */
 		    if (c == ':' && *p == ']')
 		      {
-			re_wchar_t ch;
 			re_wctype_t cc;
+			int limit;
 
 			cc = re_wctype (str);
 
@@ -2921,6 +3048,15 @@ regex_compile (pattern, size, syntax, bufp)
 
                         if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
 
+#ifndef emacs
+			for (ch = 0; ch < (1 << BYTEWIDTH); ++ch)
+			  if (re_iswctype (btowc (ch), cc))
+			    {
+			      c = TRANSLATE (ch);
+			      if (c < (1 << BYTEWIDTH))
+				SET_LIST_BIT (c);
+			    }
+#else  /* emacs */
 			/* Most character classes in a multibyte match
 			   just set a flag.  Exceptions are is_blank,
 			   is_digit, is_cntrl, and is_xdigit, since
@@ -2928,18 +3064,25 @@ regex_compile (pattern, size, syntax, bufp)
 			   don't need to handle them for multibyte.
 			   They are distinguished by a negative wctype.  */
 
-			if (multibyte)
-			  SET_RANGE_TABLE_WORK_AREA_BIT (range_table_work,
-							 re_wctype_to_bit (cc));
-
-                        for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
+			for (ch = 0; ch < 256; ++ch)
 			  {
-			    int translated = TRANSLATE (ch);
-			    if (translated < (1 << BYTEWIDTH)
-				&& re_iswctype (btowc (ch), cc))
-			      SET_LIST_BIT (translated);
+			    c = RE_CHAR_TO_MULTIBYTE (ch);
+			    if (! CHAR_BYTE8_P (c)
+				&& re_iswctype (c, cc))
+			      {
+				SET_LIST_BIT (ch);
+				c1 = TRANSLATE (c);
+				if (c1 == c)
+				  continue;
+				if (ASCII_CHAR_P (c1))
+				  SET_LIST_BIT (c1);
+				else if ((c1 = RE_CHAR_TO_UNIBYTE (c1)) >= 0)
+				  SET_LIST_BIT (c1);
+			      }
 			  }
-
+			SET_RANGE_TABLE_WORK_AREA_BIT
+			  (range_table_work, re_wctype_to_bit (cc));
+#endif	/* emacs */
 			/* In most cases the matching rule for char classes
 			   only uses the syntax table for multibyte chars,
 			   so that the content of the syntax-table it is not
@@ -2972,64 +3115,63 @@ regex_compile (pattern, size, syntax, bufp)
 
 		    /* Fetch the character which ends the range. */
 		    PATFETCH (c1);
-
-		    if (SINGLE_BYTE_CHAR_P (c))
-		      {
-			if (! SINGLE_BYTE_CHAR_P (c1))
-			  {
-			    /* Handle a range starting with a
-			       character of less than 256, and ending
-			       with a character of not less than 256.
-			       Split that into two ranges, the low one
-			       ending at 0377, and the high one
-			       starting at the smallest character in
-			       the charset of C1 and ending at C1.  */
-			    int charset = CHAR_CHARSET (c1);
-			    re_wchar_t c2 = MAKE_CHAR (charset, 0, 0);
-
-			    SET_RANGE_TABLE_WORK_AREA (range_table_work,
-						       c2, c1);
-			    c1 = 0377;
-			  }
-		      }
-		    else if (!SAME_CHARSET_P (c, c1))
-		      FREE_STACK_RETURN (REG_ERANGEX);
+#ifdef emacs
+		    if (CHAR_BYTE8_P (c1)
+			&& ! ASCII_CHAR_P (c) && ! CHAR_BYTE8_P (c))
+		      /* Treat the range from a multibyte character to
+			 raw-byte character as empty.  */
+		      c = c1 + 1;
+#endif	/* emacs */
 		  }
 		else
 		  /* Range from C to C. */
 		  c1 = c;
 
-		/* Set the range ... */
-		if (SINGLE_BYTE_CHAR_P (c))
-		  /* ... into bitmap.  */
+		if (c > c1)
 		  {
-		    re_wchar_t this_char;
-		    re_wchar_t range_start = c, range_end = c1;
-
-		    /* If the start is after the end, the range is empty.  */
-		    if (range_start > range_end)
+		    if (syntax & RE_NO_EMPTY_RANGES)
+		      FREE_STACK_RETURN (REG_ERANGEX);
+		    /* Else, repeat the loop.  */
+		  }
+		else
+		  {
+#ifndef emacs
+		    /* Set the range into bitmap */
+		    for (; c <= c1; c++)
 		      {
-			if (syntax & RE_NO_EMPTY_RANGES)
-			  FREE_STACK_RETURN (REG_ERANGE);
-			/* Else, repeat the loop.  */
+			ch = TRANSLATE (c);
+			if (ch < (1 << BYTEWIDTH))
+			  SET_LIST_BIT (ch);
 		      }
-		    else
+#else  /* emacs */
+		    if (c < 128)
 		      {
-			for (this_char = range_start; this_char <= range_end;
-			     this_char++)
+			ch = MIN (127, c1);
+			SETUP_ASCII_RANGE (range_table_work, c, ch);
+			c = ch + 1;
+			if (CHAR_BYTE8_P (c1))
+			  c = BYTE8_TO_CHAR (128);
+		      }
+		    if (c <= c1)
+		      {
+			if (CHAR_BYTE8_P (c))
+			  {
+			    c = CHAR_TO_BYTE8 (c);
+			    c1 = CHAR_TO_BYTE8 (c1);
+			    for (; c <= c1; c++)
+			      SET_LIST_BIT (c);
+			  }
+			else if (multibyte)
+			  {
+			    SETUP_MULTIBYTE_RANGE (range_table_work, c, c1);
+			  }
+			else
 			  {
-			    int translated = TRANSLATE (this_char);
-			    if (translated < (1 << BYTEWIDTH))
-			      SET_LIST_BIT (translated);
-			    else
-			      SET_RANGE_TABLE_WORK_AREA
-				(range_table_work, translated, translated);
+			    SETUP_UNIBYTE_RANGE (range_table_work, c, c1);
 			  }
 		      }
+#endif /* emacs */
 		  }
-		else
-		  /* ... into range table.  */
-		  SET_RANGE_TABLE_WORK_AREA (range_table_work, c, c1);
 	      }
 
 	    /* Discard any (non)matching list bytes that are all 0 at the
@@ -3640,12 +3782,25 @@ regex_compile (pattern, size, syntax, bufp)
 	  {
 	    int len;
 
-	    c = TRANSLATE (c);
 	    if (multibyte)
-	      len = CHAR_STRING (c, b);
+	      {
+		c = TRANSLATE (c);
+		len = CHAR_STRING (c, b);
+		b += len;
+	      }
 	    else
-	      *b = c, len = 1;
-	    b += len;
+	      {
+		c1 = RE_CHAR_TO_MULTIBYTE (c);
+		if (! CHAR_BYTE8_P (c1))
+		  {
+		    re_wchar_t c2 = TRANSLATE (c1);
+
+		    if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
+		      c = c1;
+		  }		      
+		*b++ = c;
+		len = 1;
+	      }
 	    (*pending_exact) += len;
 	  }
 
@@ -3915,14 +4070,21 @@ analyse_first (p, pend, fastmap, multibyte)
 	case exactn:
 	  if (fastmap)
 	    {
-	      int c = RE_STRING_CHAR (p + 1, pend - p);
-	      /* When fast-scanning, the fastmap can be indexed either with
-		 a char (smaller than 256) or with the first byte of
-		 a char's byte sequence.  So we have to conservatively add
-		 both to the table.  */
-	      if (SINGLE_BYTE_CHAR_P (c))
-		fastmap[c] = 1;
+	      /* If multibyte is nonzero, the first byte of each
+		 character is an ASCII or a leading code.  Otherwise,
+		 each byte is a character.  Thus, this works in both
+		 cases. */
 	      fastmap[p[1]] = 1;
+	      if (! multibyte)
+		{
+		  /* For the case of matching this unibyte regex
+		     against multibyte, we must set a leading code of
+		     the corresponding multibyte character.  */
+		  int c = RE_CHAR_TO_MULTIBYTE (p[1]);
+
+		  if (! CHAR_BYTE8_P (c))
+		    fastmap[CHAR_LEADING_CODE (c)] = 1;
+		}
 	    }
 	  break;
 
@@ -3935,18 +4097,14 @@ analyse_first (p, pend, fastmap, multibyte)
 
 
 	case charset_not:
-	  /* Chars beyond end of bitmap are possible matches.
-	     All the single-byte codes can occur in multibyte buffers.
-	     So any that are not listed in the charset
-	     are possible matches, even in multibyte buffers.  */
 	  if (!fastmap) break;
-	  /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially
-	     because it will automatically be set when needed by virtue of
-	     being larger than the highest char of its charset (0xbf) but
-	     smaller than (1<<BYTEWIDTH).  */
-	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
-	       j < (1 << BYTEWIDTH); j++)
-	    fastmap[j] = 1;
+	  {
+	    /* Chars beyond end of bitmap are possible matches.  */
+	    for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
+		 j < (1 << BYTEWIDTH); j++)
+	      fastmap[j] = 1;
+	  }
+
 	  /* Fallthrough */
 	case charset:
 	  if (!fastmap) break;
@@ -3954,28 +4112,24 @@ analyse_first (p, pend, fastmap, multibyte)
 	  for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
 	       j >= 0; j--)
 	    if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
-	      {
-		fastmap[j] = 1;
-#ifdef emacs
-		if (j >= 0x80 && j < 0xa0)
-		  fastmap[LEADING_CODE_8_BIT_CONTROL] = 1;
-#endif
-	      }
+	      fastmap[j] = 1;
 
-	  if ((not && multibyte)
-	      /* Any character set can possibly contain a character
+#ifdef emacs
+	  if (/* Any leading code can possibly start a character
 		 which doesn't match the specified set of characters.  */
-	      || (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
-		  && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
-	    /* If we can match a character class, we can match
-	       any character set.  */
+	      not
+	      || 
+	      /* If we can match a character class, we can match any
+		 multibyte characters.  */
+	      (CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
+	       && CHARSET_RANGE_TABLE_BITS (&p[-2]) != 0))
+
 	    {
-	    set_fastmap_for_multibyte_characters:
 	      if (match_any_multibyte_characters == false)
 		{
-		  for (j = 0x80; j < 0xA0; j++)	/* XXX */
-		    if (BASE_LEADING_CODE_P (j))
-		      fastmap[j] = 1;
+		  for (j = MIN_MULTIBYTE_LEADING_CODE;
+		       j <= MAX_MULTIBYTE_LEADING_CODE; j++)
+		    fastmap[j] = 1;
 		  match_any_multibyte_characters = true;
 		}
 	    }
@@ -3983,9 +4137,10 @@ analyse_first (p, pend, fastmap, multibyte)
 	  else if (!not && CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
 		   && match_any_multibyte_characters == false)
 	    {
-	      /* Set fastmap[I] 1 where I is a base leading code of each
-		 multibyte character in the range table. */
+	      /* Set fastmap[I] to 1 where I is a leading code of each
+		 multibyte characer in the range table. */
 	      int c, count;
+	      unsigned char lc1, lc2;
 
 	      /* Make P points the range table.  `+ 2' is to skip flag
 		 bits for a character class.  */
@@ -3993,14 +4148,19 @@ analyse_first (p, pend, fastmap, multibyte)
 
 	      /* Extract the number of ranges in range table into COUNT.  */
 	      EXTRACT_NUMBER_AND_INCR (count, p);
-	      for (; count > 0; count--, p += 2 * 3) /* XXX */
+	      for (; count > 0; count--, p += 3)
 		{
-		  /* Extract the start of each range.  */
+		  /* Extract the start and end of each range.  */
+		  EXTRACT_CHARACTER (c, p);
+		  lc1 = CHAR_LEADING_CODE (c);
+		  p += 3;
 		  EXTRACT_CHARACTER (c, p);
-		  j = CHAR_CHARSET (c);
-		  fastmap[CHARSET_LEADING_CODE_BASE (j)] = 1;
+		  lc2 = CHAR_LEADING_CODE (c);
+		  for (j = lc1; j <= lc2; j++)
+		    fastmap[j] = 1;
 		}
 	    }
+#endif
 	  break;
 
 	case syntaxspec:
@@ -4023,14 +4183,19 @@ analyse_first (p, pend, fastmap, multibyte)
 	  if (!fastmap) break;
 	  not = (re_opcode_t)p[-1] == notcategoryspec;
 	  k = *p++;
-	  for (j = 0; j < (1 << BYTEWIDTH); j++)
+	  for (j = (1 << BYTEWIDTH); j >= 0; j--)
 	    if ((CHAR_HAS_CATEGORY (j, k)) ^ not)
 	      fastmap[j] = 1;
 
-	  if (multibyte)
-	    /* Any character set can possibly contain a character
-	       whose category is K (or not).  */
-	    goto set_fastmap_for_multibyte_characters;
+	  /* Any leading code can possibly start a character which
+	     has or doesn't has the specified category.  */
+	  if (match_any_multibyte_characters == false)
+	    {
+	      for (j = MIN_MULTIBYTE_LEADING_CODE;
+		   j <= MAX_MULTIBYTE_LEADING_CODE; j++)
+		fastmap[j] = 1;
+	      match_any_multibyte_characters = true;
+	    }
 	  break;
 
       /* All cases after this match the empty string.  These end with
@@ -4280,9 +4445,8 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
   int total_size = size1 + size2;
   int endpos = startpos + range;
   boolean anchored_start;
-
-  /* Nonzero if we have to concern multibyte character.  */
-  const boolean multibyte = RE_MULTIBYTE_P (bufp);
+  /* Nonzero if we are searching multibyte string.  */
+  const boolean multibyte = RE_TARGET_MULTIBYTE_P (bufp);
 
   /* Check for out-of-range STARTPOS.  */
   if (startpos < 0 || startpos > total_size)
@@ -4378,59 +4542,51 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
 
 			buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
 							 buf_charlen);
-
 			buf_ch = RE_TRANSLATE (translate, buf_ch);
-			if (buf_ch >= 0400
-			    || fastmap[buf_ch])
+			if (fastmap[CHAR_LEADING_CODE (buf_ch)])
 			  break;
 
 			range -= buf_charlen;
 			d += buf_charlen;
 		      }
 		  else
-		    {
-		      /* Convert *d to integer to shut up GCC's
-			 whining about comparison that is always
-			 true.  */
-		      int di = *d;
-
-		      while (range > lim
-			     && !fastmap[RE_TRANSLATE (translate, di)])
-			{
-			  di = *(++d);
-			  range--;
-			}
-		    }
-		}
-	      else
-		do
-		  {
-		    re_char *d_start = d;
-		    while (range > lim && !fastmap[*d])
+		    while (range > lim)
 		      {
+			register re_wchar_t ch, translated;
+
+			buf_ch = *d;
+			ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
+			translated = RE_TRANSLATE (translate, ch);
+			if (translated != ch
+			    && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
+			  buf_ch = ch;
+			if (fastmap[buf_ch])
+			  break;
 			d++;
 			range--;
 		      }
-#ifdef emacs
-		    if (multibyte && range > lim)
+		}
+	      else
+		{
+		  if (multibyte)
+		    while (range > lim)
 		      {
-			/* Check that we are at the beginning of a char.  */
-			int at_boundary;
-			AT_CHAR_BOUNDARY_P (at_boundary, d, d_start);
-			if (at_boundary)
+			int buf_charlen;
+
+			buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
+							 buf_charlen);
+			if (fastmap[CHAR_LEADING_CODE (buf_ch)])
 			  break;
-			else
-			  { /* We have matched an internal byte of a char
-			       rather than the leading byte, so it's a false
-			       positive: we should keep scanning.  */
-			    d++; range--;
-			  }
+			range -= buf_charlen;
+			d += buf_charlen;
 		      }
-		    else
-#endif
-		      break;
-		  } while (1);
-
+		  else
+		    while (range > lim && !fastmap[*d])
+		      {
+			d++;
+			range--;
+		      }
+		}
 	      startpos += irange - range;
 	    }
 	  else				/* Searching backwards.  */
@@ -4438,12 +4594,26 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
 	      int room = (startpos >= size1
 			  ? size2 + size1 - startpos
 			  : size1 - startpos);
-	      buf_ch = RE_STRING_CHAR (d, room);
-	      buf_ch = TRANSLATE (buf_ch);
-
-	      if (! (buf_ch >= 0400
-		     || fastmap[buf_ch]))
-		goto advance;
+	      if (multibyte)
+		{
+		  buf_ch = STRING_CHAR (d, room);
+		  buf_ch = TRANSLATE (buf_ch);
+		  if (! fastmap[CHAR_LEADING_CODE (buf_ch)])
+		    goto advance;
+		}
+	      else
+		{
+		  register re_wchar_t ch, translated;
+
+		  buf_ch = *d;
+		  ch = RE_CHAR_TO_MULTIBYTE (buf_ch);
+		  translated = TRANSLATE (ch);
+		  if (translated != ch
+		      && (ch = RE_CHAR_TO_UNIBYTE (translated)) >= 0)
+		    buf_ch = ch;
+		  if (! fastmap[TRANSLATE (buf_ch)])
+		    goto advance;
+		}
 	    }
 	}
 
@@ -4454,11 +4624,6 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
 
       val = re_match_2_internal (bufp, string1, size1, string2, size2,
 				 startpos, regs, stop);
-#ifndef REGEX_MALLOC
-# ifdef C_ALLOCA
-      alloca (0);
-# endif
-#endif
 
       if (val >= 0)
 	return startpos;
@@ -4722,11 +4887,11 @@ mutually_exclusive_p (bufp, p1, p2)
       {
 	register re_wchar_t c
 	  = (re_opcode_t) *p2 == endline ? '\n'
-	  : RE_STRING_CHAR (p2 + 2, pend - p2 - 2);
+	  : RE_STRING_CHAR (p2 + 2, pend - p2 - 2, multibyte);
 
 	if ((re_opcode_t) *p1 == exactn)
 	  {
-	    if (c != RE_STRING_CHAR (p1 + 2, pend - p1 - 2))
+	    if (c != RE_STRING_CHAR (p1 + 2, pend - p1 - 2, multibyte))
 	      {
 		DEBUG_PRINT3 ("  '%c' != '%c' => fast loop.\n", c, p1[2]);
 		return 1;
@@ -4740,7 +4905,7 @@ mutually_exclusive_p (bufp, p1, p2)
 
 	    /* Test if C is listed in charset (or charset_not)
 	       at `p1'.  */
-	    if (SINGLE_BYTE_CHAR_P (c))
+	    if (! multibyte || IS_REAL_ASCII (c))
 	      {
 		if (c < CHARSET_BITMAP_SIZE (p1) * BYTEWIDTH
 		    && p1[2 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
@@ -4783,9 +4948,10 @@ mutually_exclusive_p (bufp, p1, p2)
 	     size of bitmap table of P1 is extracted by
 	     using macro `CHARSET_BITMAP_SIZE'.
 
-	     Since we know that all the character listed in
-	     P2 is ASCII, it is enough to test only bitmap
-	     table of P1.  */
+	     In a multibyte case, we know that all the character
+	     listed in P2 is ASCII.  In a unibyte case, P1 has only a
+	     bitmap table.  So, in both cases, it is enough to test
+	     only the bitmap table of P1.  */
 
 	  if ((re_opcode_t) *p1 == charset)
 	    {
@@ -4894,9 +5060,6 @@ re_match (bufp, string, size, pos, regs)
 {
   int result = re_match_2_internal (bufp, NULL, 0, (re_char*) string, size,
 				    pos, regs, size);
-# if defined C_ALLOCA && !defined REGEX_MALLOC
-  alloca (0);
-# endif
   return result;
 }
 WEAK_ALIAS (__re_match, re_match)
@@ -4942,13 +5105,11 @@ re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
   result = re_match_2_internal (bufp, (re_char*) string1, size1,
 				(re_char*) string2, size2,
 				pos, regs, stop);
-#if defined C_ALLOCA && !defined REGEX_MALLOC
-  alloca (0);
-#endif
   return result;
 }
 WEAK_ALIAS (__re_match_2, re_match_2)
 
+
 /* This is a separate function so that we can force an alloca cleanup
    afterwards.  */
 static int
@@ -4985,12 +5146,15 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
   re_char *p = bufp->buffer;
   re_char *pend = p + bufp->used;
 
-  /* We use this to map every character in the string.  */
+  /* We use this to map every character in the string.	*/
   RE_TRANSLATE_TYPE translate = bufp->translate;
 
-  /* Nonzero if we have to concern multibyte character.  */
+  /* Nonzero if BUFP is setup from a multibyte regex.  */
   const boolean multibyte = RE_MULTIBYTE_P (bufp);
 
+  /* Nonzero if STRING1/STRING2 are multibyte.  */
+  const boolean target_multibyte = RE_TARGET_MULTIBYTE_P (bufp);
+
   /* Failure point stack.  Each place that can handle a failure further
      down the line pushes a failure point on this stack.  It consists of
      regstart, and regend for all registers corresponding to
@@ -5342,63 +5506,98 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	  /* Remember the start point to rollback upon failure.  */
 	  dfail = d;
 
+#ifndef emacs
 	  /* This is written out as an if-else so we don't waste time
 	     testing `translate' inside the loop.  */
 	  if (RE_TRANSLATE_P (translate))
-	    {
-	      if (multibyte)
-		do
+	    do
+	      {
+		PREFETCH ();
+		if (RE_TRANSLATE (translate, *d) != *p++)
 		  {
-		    int pat_charlen, buf_charlen;
-		    unsigned int pat_ch, buf_ch;
+		    d = dfail;
+		    goto fail;
+		  }
+		d++;
+	      }
+	    while (--mcnt);
+	  else
+	    do
+	      {
+		PREFETCH ();
+		if (*d++ != *p++)
+		  {
+		    d = dfail;
+		    goto fail;
+		  }
+	      }
+	    while (--mcnt);
+#else  /* emacs */
+	  /* The cost of testing `translate' is comparatively small.  */
+	  if (target_multibyte)
+	    do
+	      {
+		int pat_charlen, buf_charlen;
+		int pat_ch, buf_ch;
 
-		    PREFETCH ();
-		    pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
-		    buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+		PREFETCH ();
+		if (multibyte)
+		  pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
+		else
+		  {
+		    pat_ch = RE_CHAR_TO_MULTIBYTE (*p);
+		    pat_charlen = 1;
+		  }
+		buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
 
-		    if (RE_TRANSLATE (translate, buf_ch)
-			!= pat_ch)
-		      {
-			d = dfail;
-			goto fail;
-		      }
+		if (TRANSLATE (buf_ch) != pat_ch)
+		  {
+		    d = dfail;
+		    goto fail;
+		  }
+
+		p += pat_charlen;
+		d += buf_charlen;
+		mcnt -= pat_charlen;
+	      }
+	    while (mcnt > 0);
+	  else
+	    do
+	      {
+		int pat_charlen, buf_charlen;
+		int pat_ch, buf_ch;
 
-		    p += pat_charlen;
-		    d += buf_charlen;
-		    mcnt -= pat_charlen;
+		PREFETCH ();
+		if (multibyte)
+		  {
+		    pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
+		    pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch);
 		  }
-		while (mcnt > 0);
-	      else
-		do
+		else
 		  {
-		    /* Avoid compiler whining about comparison being
-		       always true.  */
-		    int di;
-
-		    PREFETCH ();
-		    di = *d;
-		    if (RE_TRANSLATE (translate, di) != *p++)
-		      {
-			d = dfail;
-			goto fail;
-		      }
-		    d++;
+		    pat_ch = *p;
+		    pat_charlen = 1;
 		  }
-		while (--mcnt);
-	    }
-	  else
-	    {
-	      do
-		{
-		  PREFETCH ();
-		  if (*d++ != *p++)
-		    {
-		      d = dfail;
-		      goto fail;
-		    }
-		}
-	      while (--mcnt);
-	    }
+		buf_ch = RE_CHAR_TO_MULTIBYTE (*d);
+		if (! CHAR_BYTE8_P (buf_ch))
+		  {
+		    buf_ch = TRANSLATE (buf_ch);
+		    buf_ch = RE_CHAR_TO_UNIBYTE (buf_ch);
+		    if (buf_ch < 0)
+		      buf_ch = *d;
+		  }
+		else
+		  buf_ch = *d;
+		if (buf_ch != pat_ch)
+		  {
+		    d = dfail;
+		    goto fail;
+		  }
+		p += pat_charlen;
+		d++;
+	      }
+	    while (--mcnt);
+#endif
 	  break;
 
 
@@ -5411,7 +5610,8 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	    DEBUG_PRINT1 ("EXECUTING anychar.\n");
 
 	    PREFETCH ();
-	    buf_ch = RE_STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+	    buf_ch = RE_STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen,
+						target_multibyte);
 	    buf_ch = TRANSLATE (buf_ch);
 
 	    if ((!(bufp->syntax & RE_DOT_NEWLINE)
@@ -5444,6 +5644,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	       in the initial byte-length of the command.  */
 	    int count = 0;
 
+	    /* Whether matching against a unibyte character.  */
+	    boolean unibyte_char = false;
+
 	    DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
 
 	    range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
@@ -5455,10 +5658,38 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      }
 
 	    PREFETCH ();
-	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
-	    c = TRANSLATE (c); /* The character to match.  */
+	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len, target_multibyte);
+	    if (target_multibyte)
+	      {
+		int c1;
+
+		c = TRANSLATE (c);
+		c1 = RE_CHAR_TO_UNIBYTE (c);
+		if (c1 >= 0)
+		  {
+		    unibyte_char = true;
+		    c = c1;
+		  }
+	      }
+	    else
+	      {
+		int c1 = RE_CHAR_TO_MULTIBYTE (c);
 
-	    if (SINGLE_BYTE_CHAR_P (c))
+		if (! CHAR_BYTE8_P (c1))
+		  {
+		    c1 = TRANSLATE (c1);
+		    c1 = RE_CHAR_TO_UNIBYTE (c1);
+		    if (c1 >= 0)
+		      {
+			unibyte_char = true;
+			c = c1;
+		      }
+		  }
+		else
+		  unibyte_char = true;
+	      }
+
+	    if (unibyte_char && c < (1 << BYTEWIDTH))
 	      {			/* Lookup bitmap.  */
 		/* Cast to `unsigned' instead of `unsigned char' in
 		   case the bit list is a full 32 bytes long.  */
@@ -5598,7 +5829,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		/* Compare that many; failure if mismatch, else move
 		   past them.  */
 		if (RE_TRANSLATE_P (translate)
-		    ? bcmp_translate (d, d2, mcnt, translate, multibyte)
+		    ? bcmp_translate (d, d2, mcnt, translate, target_multibyte)
 		    : memcmp (d, d2, mcnt))
 		  {
 		    d = dfail;
@@ -5621,7 +5852,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	    }
 	  else
 	    {
-	      unsigned char c;
+	      unsigned c;
 	      GET_CHAR_BEFORE_2 (c, d, string1, end1, string2, end2);
 	      if (c == '\n')
 		break;
@@ -5888,6 +6119,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		 is the character at D, and S2 is the syntax of C2.  */
 	      re_wchar_t c1, c2;
 	      int s1, s2;
+	      int dummy;
 #ifdef emacs
 	      int offset = PTR_TO_OFFSET (d - 1);
 	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5899,7 +6131,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
 #endif
 	      PREFETCH_NOLIMIT ();
-	      c2 = RE_STRING_CHAR (d, dend - d);
+	      GET_CHAR_AFTER (c2, d, dummy);
 	      s2 = SYNTAX (c2);
 
 	      if (/* Case 2: Only one of S1 and S2 is Sword.  */
@@ -5928,13 +6160,14 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		 is the character at D, and S2 is the syntax of C2.  */
 	      re_wchar_t c1, c2;
 	      int s1, s2;
+	      int dummy;
 #ifdef emacs
 	      int offset = PTR_TO_OFFSET (d);
 	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
 	      UPDATE_SYNTAX_TABLE (charpos);
 #endif
 	      PREFETCH ();
-	      c2 = RE_STRING_CHAR (d, dend - d);
+	      GET_CHAR_AFTER (c2, d, dummy);
 	      s2 = SYNTAX (c2);
 
 	      /* Case 2: S2 is not Sword. */
@@ -5972,6 +6205,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 		 is the character at D, and S2 is the syntax of C2.  */
 	      re_wchar_t c1, c2;
 	      int s1, s2;
+	      int dummy;
 #ifdef emacs
 	      int offset = PTR_TO_OFFSET (d) - 1;
 	      int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
@@ -5988,9 +6222,9 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      if (!AT_STRINGS_END (d))
 		{
 		  PREFETCH_NOLIMIT ();
-		  c2 = RE_STRING_CHAR (d, dend - d);
+		  GET_CHAR_AFTER (c2, d, dummy);
 #ifdef emacs
-		  UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+		  UPDATE_SYNTAX_TABLE_FORWARD (charpos);
 #endif
 		  s2 = SYNTAX (c2);
 
@@ -6022,7 +6256,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      UPDATE_SYNTAX_TABLE (charpos);
 #endif
 	      PREFETCH ();
-	      c2 = RE_STRING_CHAR (d, dend - d);
+	      c2 = RE_STRING_CHAR (d, dend - d, target_multibyte);
 	      s2 = SYNTAX (c2);
 
 	      /* Case 2: S2 is neither Sword nor Ssymbol. */
@@ -6075,7 +6309,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	      if (!AT_STRINGS_END (d))
 		{
 		  PREFETCH_NOLIMIT ();
-		  c2 = RE_STRING_CHAR (d, dend - d);
+		  c2 = RE_STRING_CHAR (d, dend - d, target_multibyte);
 #ifdef emacs
 		  UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
 #endif
@@ -6105,8 +6339,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	    int len;
 	    re_wchar_t c;
 
-	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
-
+	    GET_CHAR_AFTER (c, d, len);
 	    if ((SYNTAX (c) != (enum syntaxcode) mcnt) ^ not)
 	      goto fail;
 	    d += len;
@@ -6142,8 +6375,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
 	    int len;
 	    re_wchar_t c;
 
-	    c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len);
-
+	    GET_CHAR_AFTER (c, d, len);
 	    if ((!CHAR_HAS_CATEGORY (c, mcnt)) ^ not)
 	      goto fail;
 	    d += len;
@@ -6218,11 +6450,11 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
    bytes; nonzero otherwise.  */
 
 static int
-bcmp_translate (s1, s2, len, translate, multibyte)
+bcmp_translate (s1, s2, len, translate, target_multibyte)
      re_char *s1, *s2;
      register int len;
      RE_TRANSLATE_TYPE translate;
-     const int multibyte;
+     const int target_multibyte;
 {
   register re_char *p1 = s1, *p2 = s2;
   re_char *p1_end = s1 + len;
@@ -6235,8 +6467,8 @@ bcmp_translate (s1, s2, len, translate, multibyte)
       int p1_charlen, p2_charlen;
       re_wchar_t p1_ch, p2_ch;
 
-      p1_ch = RE_STRING_CHAR_AND_LENGTH (p1, p1_end - p1, p1_charlen);
-      p2_ch = RE_STRING_CHAR_AND_LENGTH (p2, p2_end - p2, p2_charlen);
+      GET_CHAR_AFTER (p1_ch, p1, p1_charlen);
+      GET_CHAR_AFTER (p2_ch, p2, p2_charlen);
 
       if (RE_TRANSLATE (translate, p1_ch)
 	  != RE_TRANSLATE (translate, p2_ch))
@@ -6603,20 +6835,17 @@ void
 regfree (preg)
     regex_t *preg;
 {
-  if (preg->buffer != NULL)
-    free (preg->buffer);
+  free (preg->buffer);
   preg->buffer = NULL;
 
   preg->allocated = 0;
   preg->used = 0;
 
-  if (preg->fastmap != NULL)
-    free (preg->fastmap);
+  free (preg->fastmap);
   preg->fastmap = NULL;
   preg->fastmap_accurate = 0;
 
-  if (preg->translate != NULL)
-    free (preg->translate);
+  free (preg->translate);
   preg->translate = NULL;
 }
 WEAK_ALIAS (__regfree, regfree)