internationalization features.)
Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
- 2002, 2003, 2004, 2005, 2006, 2007
+ 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
that make sense only in Emacs. */
#ifdef emacs
+# include <setjmp.h>
# include "lisp.h"
# include "buffer.h"
# define RE_MULTIBYTE_P(bufp) ((bufp)->multibyte)
# define RE_TARGET_MULTIBYTE_P(bufp) ((bufp)->target_multibyte)
-# define RE_STRING_CHAR(p, s, multibyte) \
- (multibyte ? (STRING_CHAR (p, s)) : (*(p)))
-# define RE_STRING_CHAR_AND_LENGTH(p, s, len, multibyte) \
- (multibyte ? (STRING_CHAR_AND_LENGTH (p, s, len)) : ((len) = 1, *(p)))
+# define RE_STRING_CHAR(p, multibyte) \
+ (multibyte ? (STRING_CHAR (p)) : (*(p)))
+# define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) \
+ (multibyte ? (STRING_CHAR_AND_LENGTH (p, len)) : ((len) = 1, *(p)))
-# define RE_CHAR_TO_MULTIBYTE(c) unibyte_to_multibyte_table[(c)]
+# define RE_CHAR_TO_MULTIBYTE(c) UNIBYTE_TO_CHAR (c)
-# define RE_CHAR_TO_UNIBYTE(c) \
- (ASCII_CHAR_P (c) ? (c) \
- : CHAR_BYTE8_P (c) ? CHAR_TO_BYTE8 (c) \
- : multibyte_char_to_unibyte_safe (c))
+# define RE_CHAR_TO_UNIBYTE(c) CHAR_TO_BYTE_SAFE (c)
/* Set C a (possibly converted to multibyte) character before P. P
points into a string which is the virtual concatenation of STR1
re_char *dtemp = (p) == (str2) ? (end1) : (p); \
re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \
- c = STRING_CHAR (dtemp, (p) - dtemp); \
+ c = STRING_CHAR (dtemp); \
} \
else \
{ \
# define GET_CHAR_AFTER(c, p, len) \
do { \
if (target_multibyte) \
- (c) = STRING_CHAR_AND_LENGTH (p, 0, len); \
+ (c) = STRING_CHAR_AND_LENGTH (p, len); \
else \
{ \
(c) = *p; \
/* Sword must be nonzero for the wordchar pattern commands in re_match_2. */
enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
-# ifdef SWITCH_ENUM_BUG
-# define SWITCH_ENUM_CAST(x) ((int)(x))
-# else
# define SWITCH_ENUM_CAST(x) (x)
-# endif
/* Dummy macros for non-Emacs environments. */
# define BASE_LEADING_CODE_P(c) (0)
# define SAME_CHARSET_P(c1, c2) (1)
# define MULTIBYTE_FORM_LENGTH(p, s) (1)
# define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
-# define STRING_CHAR(p, s) (*(p))
-# define RE_STRING_CHAR(p, s, multibyte) STRING_CHAR ((p), (s))
+# define STRING_CHAR(p) (*(p))
+# define RE_STRING_CHAR(p, multibyte) STRING_CHAR (p)
# define CHAR_STRING(c, s) (*(s) = (c), 1)
-# define STRING_CHAR_AND_LENGTH(p, s, actual_len) ((actual_len) = 1, *(p))
-# define RE_STRING_CHAR_AND_LENGTH(p, s, len, multibyte) STRING_CHAR_AND_LENGTH ((p), (s), (len))
+# define STRING_CHAR_AND_LENGTH(p, actual_len) ((actual_len) = 1, *(p))
+# define RE_STRING_CHAR_AND_LENGTH(p, len, multibyte) STRING_CHAR_AND_LENGTH (p, len)
# define RE_CHAR_TO_MULTIBYTE(c) (c)
# define RE_CHAR_TO_UNIBYTE(c) (c)
# define GET_CHAR_BEFORE_2(c, p, str1, end1, str2, end2) \
do { \
int len; \
if (p == pend) return REG_EEND; \
- c = RE_STRING_CHAR_AND_LENGTH (p, pend - p, len, multibyte); \
+ c = RE_STRING_CHAR_AND_LENGTH (p, len, multibyte); \
p += len; \
} while (0)
being larger than MAX_BUF_SIZE, then flag memory exhausted. */
#if __BOUNDED_POINTERS__
# define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
-# define MOVE_BUFFER_POINTER(P) \
- (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
+# define MOVE_BUFFER_POINTER(P) \
+ (__ptrlow (P) = new_buffer + (__ptrlow (P) - old_buffer), \
+ SET_HIGH_BOUND (P), \
+ __ptrvalue (P) = new_buffer + (__ptrvalue (P) - old_buffer))
# define ELSE_EXTEND_BUFFER_HIGH_BOUND \
else \
{ \
SET_HIGH_BOUND (pending_exact); \
}
#else
-# define MOVE_BUFFER_POINTER(P) (P) += incr
+# define MOVE_BUFFER_POINTER(P) ((P) = new_buffer + ((P) - old_buffer))
# define ELSE_EXTEND_BUFFER_HIGH_BOUND
#endif
#define EXTEND_BUFFER() \
do { \
- re_char *old_buffer = bufp->buffer; \
+ unsigned char *old_buffer = bufp->buffer; \
if (bufp->allocated == MAX_BUF_SIZE) \
return REG_ESIZE; \
bufp->allocated <<= 1; \
/* If the buffer moved, move all the pointers into it. */ \
if (old_buffer != bufp->buffer) \
{ \
- int incr = bufp->buffer - old_buffer; \
+ unsigned char *new_buffer = bufp->buffer; \
MOVE_BUFFER_POINTER (b); \
MOVE_BUFFER_POINTER (begalt); \
if (fixup_alt_jump) \
} while (0)
-/* Both FROM and TO are mulitbyte characters. */
+/* Both FROM and TO are multibyte characters. */
#define SETUP_MULTIBYTE_RANGE(work_area, FROM, TO) \
do { \
#endif /* emacs */
-/* Record the the image of the range start..end when passed through
+/* Record the image of the range start..end when passed through
TRANSLATE. This is not necessarily TRANSLATE(start)..TRANSLATE(end)
and is not even necessarily contiguous.
Normally we approximate it with the smallest contiguous range that contains
don't need to handle them for multibyte.
They are distinguished by a negative wctype. */
+ /* Setup the gl_state object to its buffer-defined
+ value. This hardcodes the buffer-global
+ syntax-table for ASCII chars, while the other chars
+ will obey syntax-table properties. It's not ideal,
+ but it's the way it's been done until now. */
+ SETUP_SYNTAX_TABLE (BEGV, 0);
+
for (ch = 0; ch < 256; ++ch)
{
c = RE_CHAR_TO_MULTIBYTE (ch);
if (c1 != c2 && (c1 = RE_CHAR_TO_UNIBYTE (c2)) >= 0)
c = c1;
- }
+ }
*b++ = c;
len = 1;
}
the corresponding multibyte character. */
int c = RE_CHAR_TO_MULTIBYTE (p[1]);
- if (! CHAR_BYTE8_P (c))
- fastmap[CHAR_LEADING_CODE (c)] = 1;
+ fastmap[CHAR_LEADING_CODE (c)] = 1;
}
}
break;
if (/* Any leading code can possibly start a character
which doesn't match the specified set of characters. */
not
- ||
+ ||
/* If we can match a character class, we can match any
multibyte characters. */
(CHARSET_RANGE_TABLE_EXISTS_P (&p[-2])
&& match_any_multibyte_characters == false)
{
/* Set fastmap[I] to 1 where I is a leading code of each
- multibyte characer in the range table. */
+ multibyte character in the range table. */
int c, count;
unsigned char lc1, lc2;
{
int buf_charlen;
- buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
- buf_charlen);
+ buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
buf_ch = RE_TRANSLATE (translate, buf_ch);
if (fastmap[CHAR_LEADING_CODE (buf_ch)])
break;
{
int buf_charlen;
- buf_ch = STRING_CHAR_AND_LENGTH (d, range - lim,
- buf_charlen);
+ buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
if (fastmap[CHAR_LEADING_CODE (buf_ch)])
break;
range -= buf_charlen;
}
else /* Searching backwards. */
{
- int room = (startpos >= size1
- ? size2 + size1 - startpos
- : size1 - startpos);
if (multibyte)
{
- buf_ch = STRING_CHAR (d, room);
+ buf_ch = STRING_CHAR (d);
buf_ch = TRANSLATE (buf_ch);
if (! fastmap[CHAR_LEADING_CODE (buf_ch)])
goto advance;
{
register re_wchar_t c
= (re_opcode_t) *p2 == endline ? '\n'
- : RE_STRING_CHAR (p2 + 2, pend - p2 - 2, multibyte);
+ : RE_STRING_CHAR (p2 + 2, multibyte);
if ((re_opcode_t) *p1 == exactn)
{
- if (c != RE_STRING_CHAR (p1 + 2, pend - p1 - 2, multibyte))
+ if (c != RE_STRING_CHAR (p1 + 2, multibyte))
{
DEBUG_PRINT3 (" '%c' != '%c' => fast loop.\n", c, p1[2]);
return 1;
PREFETCH ();
if (multibyte)
- pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
+ pat_ch = STRING_CHAR_AND_LENGTH (p, pat_charlen);
else
{
pat_ch = RE_CHAR_TO_MULTIBYTE (*p);
pat_charlen = 1;
}
- buf_ch = STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen);
+ buf_ch = STRING_CHAR_AND_LENGTH (d, buf_charlen);
if (TRANSLATE (buf_ch) != pat_ch)
{
PREFETCH ();
if (multibyte)
{
- pat_ch = STRING_CHAR_AND_LENGTH (p, pend - p, pat_charlen);
- if (CHAR_BYTE8_P (pat_ch))
- pat_ch = CHAR_TO_BYTE8 (pat_ch);
- else
- pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch);
+ pat_ch = STRING_CHAR_AND_LENGTH (p, pat_charlen);
+ pat_ch = RE_CHAR_TO_UNIBYTE (pat_ch);
}
else
{
if (buf_ch < 0)
buf_ch = *d;
}
+ else
+ buf_ch = *d;
if (buf_ch != pat_ch)
{
d = dfail;
DEBUG_PRINT1 ("EXECUTING anychar.\n");
PREFETCH ();
- buf_ch = RE_STRING_CHAR_AND_LENGTH (d, dend - d, buf_charlen,
+ buf_ch = RE_STRING_CHAR_AND_LENGTH (d, buf_charlen,
target_multibyte);
buf_ch = TRANSLATE (buf_ch);
in the initial byte-length of the command. */
int count = 0;
+ /* Whether matching against a unibyte character. */
+ boolean unibyte_char = false;
+
DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
range_table_exists = CHARSET_RANGE_TABLE_EXISTS_P (&p[-1]);
}
PREFETCH ();
- c = RE_STRING_CHAR_AND_LENGTH (d, dend - d, len, target_multibyte);
+ c = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
if (target_multibyte)
{
int c1;
c = TRANSLATE (c);
c1 = RE_CHAR_TO_UNIBYTE (c);
if (c1 >= 0)
- c = c1;
+ {
+ unibyte_char = true;
+ c = c1;
+ }
}
else
{
c1 = TRANSLATE (c1);
c1 = RE_CHAR_TO_UNIBYTE (c1);
if (c1 >= 0)
- c = c1;
+ {
+ unibyte_char = true;
+ c = c1;
+ }
}
+ else
+ unibyte_char = true;
}
- if (c < (1 << BYTEWIDTH))
+ if (unibyte_char && c < (1 << BYTEWIDTH))
{ /* Lookup bitmap. */
/* Cast to `unsigned' instead of `unsigned char' in
case the bit list is a full 32 bytes long. */
UPDATE_SYNTAX_TABLE (charpos);
#endif
PREFETCH ();
- c2 = RE_STRING_CHAR (d, dend - d, target_multibyte);
+ c2 = RE_STRING_CHAR (d, target_multibyte);
s2 = SYNTAX (c2);
/* Case 2: S2 is neither Sword nor Ssymbol. */
if (!AT_STRINGS_END (d))
{
PREFETCH_NOLIMIT ();
- c2 = RE_STRING_CHAR (d, dend - d, target_multibyte);
+ c2 = RE_STRING_CHAR (d, target_multibyte);
#ifdef emacs
UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
#endif
regfree (preg)
regex_t *preg;
{
- if (preg->buffer != NULL)
- free (preg->buffer);
+ free (preg->buffer);
preg->buffer = NULL;
preg->allocated = 0;
preg->used = 0;
- if (preg->fastmap != NULL)
- free (preg->fastmap);
+ free (preg->fastmap);
preg->fastmap = NULL;
preg->fastmap_accurate = 0;
- if (preg->translate != NULL)
- free (preg->translate);
+ free (preg->translate);
preg->translate = NULL;
}
WEAK_ALIAS (__regfree, regfree)