0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
- Copyright (C) 1993,94,95,96,97,98,99,2000 Free Software Foundation, Inc.
+ Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+ 2002, 2003, 2004, 2005, 2006 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA. */
/* TODO:
char *realloc ();
# endif
+/* When used in Emacs's lib-src, we need xmalloc and xrealloc. */
+
+void *
+xmalloc (size)
+ size_t size;
+{
+ register void *val;
+ val = (void *) malloc (size);
+ if (!val && size)
+ {
+ write (2, "virtual memory exhausted\n", 25);
+ exit (1);
+ }
+ return val;
+}
+
+void *
+xrealloc (block, size)
+ void *block;
+ size_t size;
+{
+ register void *val;
+ /* We must call malloc explicitly when BLOCK is 0, since some
+ reallocs don't do this. */
+ if (! block)
+ val = (void *) malloc (size);
+ else
+ val = (void *) realloc (block, size);
+ if (!val && size)
+ {
+ write (2, "virtual memory exhausted\n", 25);
+ exit (1);
+ }
+ return val;
+}
+
+# ifdef malloc
+# undef malloc
+# endif
+# define malloc xmalloc
+# ifdef realloc
+# undef realloc
+# endif
+# define realloc xrealloc
+
/* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
If nothing else has been done, use the method below. */
# ifdef INHIBIT_STRING_HEADER
/* Define the syntax stuff for \<, \>, etc. */
/* Sword must be nonzero for the wordchar pattern commands in re_match_2. */
-enum syntaxcode { Swhitespace = 0, Sword = 1 };
+enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
# ifdef SWITCH_ENUM_BUG
# define SWITCH_ENUM_CAST(x) ((int)(x))
if (ISALNUM (c))
re_syntax_table[c] = Sword;
- re_syntax_table['_'] = Sword;
+ re_syntax_table['_'] = Ssymbol;
done = 1;
}
wordbound, /* Succeeds if at a word boundary. */
notwordbound, /* Succeeds if not at a word boundary. */
+ symbeg, /* Succeeds if at symbol beginning. */
+ symend, /* Succeeds if at symbol end. */
+
/* Matches any character whose syntax is specified. Followed by
a byte which contains a syntax code, e.g., Sword. */
syntaxspec,
case wordend:
fprintf (stderr, "/wordend");
+ break;
+
+ case symbeg:
+ fprintf (stderr, "/symbeg");
+ break;
+
+ case symend:
+ fprintf (stderr, "/symend");
+ break;
case syntaxspec:
fprintf (stderr, "/syntaxspec");
reg_syntax_t
re_set_syntax (syntax)
- reg_syntax_t syntax;
+ reg_syntax_t syntax;
{
reg_syntax_t ret = re_syntax_options;
return ret;
}
WEAK_ALIAS (__re_set_syntax, re_set_syntax)
+
+/* Regexp to use to replace spaces, or NULL meaning don't. */
+static re_char *whitespace_regexp;
+
+void
+re_set_whitespace_regexp (regexp)
+ const char *regexp;
+{
+ whitespace_regexp = (re_char *) regexp;
+}
+WEAK_ALIAS (__re_set_syntax, re_set_syntax)
\f
/* This table gives an error message for each of the error codes listed
in regex.h. Obviously the order here has to be same as there.
gettext_noop ("Premature end of regular expression"), /* REG_EEND */
gettext_noop ("Regular expression too big"), /* REG_ESIZE */
gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
+ gettext_noop ("Range striding over charsets") /* REG_ERANGEX */
};
\f
/* Avoiding alloca during matching, to placate r_alloc. */
/* The next available element. */
#define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
-/* Explicit quit checking is only used on NTemacs. */
-#if defined WINDOWSNT && defined emacs && defined QUIT
+/* Explicit quit checking is only used on NTemacs and whenever we
+ use polling to process input events. */
+#if defined emacs && (defined WINDOWSNT || defined SYNC_INPUT) && defined QUIT
extern int immediate_quit;
# define IMMEDIATE_QUIT_CHECK \
do { \
/* Get the next unsigned number in the uncompiled pattern. */
#define GET_UNSIGNED_NUMBER(num) \
- do { if (p != pend) \
- { \
- PATFETCH (c); \
- if (c == ' ') \
- FREE_STACK_RETURN (REG_BADBR); \
- while ('0' <= c && c <= '9') \
- { \
- int prev; \
- if (num < 0) \
- num = 0; \
- prev = num; \
- num = num * 10 + c - '0'; \
- if (num / 10 != prev) \
- FREE_STACK_RETURN (REG_BADBR); \
- if (p == pend) \
- break; \
- PATFETCH (c); \
- } \
- if (c == ' ') \
- FREE_STACK_RETURN (REG_BADBR); \
- } \
- } while (0)
+ do { \
+ if (p == pend) \
+ FREE_STACK_RETURN (REG_EBRACE); \
+ else \
+ { \
+ PATFETCH (c); \
+ while ('0' <= c && c <= '9') \
+ { \
+ int prev; \
+ if (num < 0) \
+ num = 0; \
+ prev = num; \
+ num = num * 10 + c - '0'; \
+ if (num / 10 != prev) \
+ FREE_STACK_RETURN (REG_BADBR); \
+ if (p == pend) \
+ FREE_STACK_RETURN (REG_EBRACE); \
+ PATFETCH (c); \
+ } \
+ } \
+ } while (0)
\f
-#if WIDE_CHAR_SUPPORT
-/* The GNU C library provides support for user-defined character classes
- and the functions from ISO C amendement 1. */
-# ifdef CHARCLASS_NAME_MAX
-# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
-# else
-/* This shouldn't happen but some implementation might still have this
- problem. Use a reasonable default value. */
-# define CHAR_CLASS_MAX_LENGTH 256
-# endif
-typedef wctype_t re_wctype_t;
-typedef wchar_t re_wchar_t;
-# define re_wctype wctype
-# define re_iswctype iswctype
-# define re_wctype_to_bit(cc) 0
-#else
-# define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */
-# define btowc(c) c
-
-/* Character classes. */
-typedef enum { RECC_ERROR = 0,
- RECC_ALNUM, RECC_ALPHA, RECC_WORD,
- RECC_GRAPH, RECC_PRINT,
- RECC_LOWER, RECC_UPPER,
- RECC_PUNCT, RECC_CNTRL,
- RECC_DIGIT, RECC_XDIGIT,
- RECC_BLANK, RECC_SPACE,
- RECC_MULTIBYTE, RECC_NONASCII,
- RECC_ASCII, RECC_UNIBYTE
-} re_wctype_t;
-
-typedef int re_wchar_t;
+#if ! WIDE_CHAR_SUPPORT
/* Map a string to the char class it names (if any). */
-static re_wctype_t
+re_wctype_t
re_wctype (str)
re_char *str;
{
}
/* True iff CH is in the char class CC. */
-static boolean
+boolean
re_iswctype (ch, cc)
int ch;
re_wctype_t cc;
/* If the object matched can contain multibyte characters. */
const boolean multibyte = RE_MULTIBYTE_P (bufp);
+ /* Nonzero if we have pushed down into a subpattern. */
+ int in_subpattern = 0;
+
+ /* These hold the values of p, pattern, and pend from the main
+ pattern when we have pushed into a subpattern. */
+ re_char *main_p;
+ re_char *main_pattern;
+ re_char *main_pend;
+
#ifdef DEBUG
debug++;
DEBUG_PRINT1 ("\nCompiling pattern: ");
begalt = b = bufp->buffer;
/* Loop through the uncompiled pattern until we're at the end. */
- while (p != pend)
+ while (1)
{
+ if (p == pend)
+ {
+ /* If this is the end of an included regexp,
+ pop back to the main regexp and try again. */
+ if (in_subpattern)
+ {
+ in_subpattern = 0;
+ pattern = main_pattern;
+ p = main_p;
+ pend = main_pend;
+ continue;
+ }
+ /* If this is the end of the main regexp, we are done. */
+ break;
+ }
+
PATFETCH (c);
switch (c)
{
+ case ' ':
+ {
+ re_char *p1 = p;
+
+ /* If there's no special whitespace regexp, treat
+ spaces normally. And don't try to do this recursively. */
+ if (!whitespace_regexp || in_subpattern)
+ goto normal_char;
+
+ /* Peek past following spaces. */
+ while (p1 != pend)
+ {
+ if (*p1 != ' ')
+ break;
+ p1++;
+ }
+ /* If the spaces are followed by a repetition op,
+ treat them normally. */
+ if (p1 != pend
+ && (*p1 == '*' || *p1 == '+' || *p1 == '?'
+ || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
+ goto normal_char;
+
+ /* Replace the spaces with the whitespace regexp. */
+ in_subpattern = 1;
+ main_p = p1;
+ main_pend = pend;
+ main_pattern = pattern;
+ p = pattern = whitespace_regexp;
+ pend = p + strlen (p);
+ break;
+ }
+
case '^':
{
if ( /* If at start of pattern, it's an operator. */
}
}
else if (!SAME_CHARSET_P (c, c1))
- FREE_STACK_RETURN (REG_ERANGE);
+ FREE_STACK_RETURN (REG_ERANGEX);
}
else
/* Range from C to C. */
beg_interval = p;
- if (p == pend)
- FREE_STACK_RETURN (REG_EBRACE);
-
GET_UNSIGNED_NUMBER (lower_bound);
if (c == ',')
{
if (c != '\\')
FREE_STACK_RETURN (REG_BADBR);
-
+ if (p == pend)
+ FREE_STACK_RETURN (REG_EESCAPE);
PATFETCH (c);
}
BUF_PUSH (wordend);
break;
+ case '_':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ laststart = b;
+ PATFETCH (c);
+ if (c == '<')
+ BUF_PUSH (symbeg);
+ else if (c == '>')
+ BUF_PUSH (symend);
+ else
+ FREE_STACK_RETURN (REG_BADPAT);
+ break;
+
case 'b':
if (syntax & RE_NO_GNU_OPS)
goto normal_char;
case notwordbound:
case wordbeg:
case wordend:
+ case symbeg:
+ case symend:
continue;
d += buf_charlen;
}
else
- while (range > lim
- && !fastmap[RE_TRANSLATE (translate, *d)])
- {
- d++;
- range--;
- }
+ {
+ /* Convert *d to integer to shut up GCC's
+ whining about comparison that is always
+ true. */
+ int di = *d;
+
+ while (range > lim
+ && !fastmap[RE_TRANSLATE (translate, di)])
+ {
+ di = *(++d);
+ range--;
+ }
+ }
}
else
while (range > lim && !fastmap[*d])
break;
case wordend:
- case notsyntaxspec:
+ return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
+ case symend:
return ((re_opcode_t) *p1 == syntaxspec
- && p1[1] == (op2 == wordend ? Sword : p2[1]));
+ && (p1[1] == Ssymbol || p1[1] == Sword));
+ case notsyntaxspec:
+ return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
case wordbeg:
- case syntaxspec:
+ return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
+ case symbeg:
return ((re_opcode_t) *p1 == notsyntaxspec
- && p1[1] == (op2 == wordend ? Sword : p2[1]));
+ && (p1[1] == Ssymbol || p1[1] == Sword));
+ case syntaxspec:
+ return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
case wordbound:
return (((re_opcode_t) *p1 == notsyntaxspec
else
do
{
+ /* Avoid compiler whining about comparison being
+ always true. */
+ int di;
+
PREFETCH ();
- if (RE_TRANSLATE (translate, *d) != *p++)
+ di = *d;
+ if (RE_TRANSLATE (translate, di) != *p++)
{
d = dfail;
goto fail;
the repetition text and either the following jump or
pop_failure_jump back to this on_failure_jump. */
case on_failure_jump:
- IMMEDIATE_QUIT_CHECK;
EXTRACT_NUMBER_AND_INCR (mcnt, p);
DEBUG_PRINT3 ("EXECUTING on_failure_jump %d (to %p):\n",
mcnt, p + mcnt);
then we can use a non-backtracking loop based on
on_failure_keep_string_jump instead of on_failure_jump. */
case on_failure_jump_smart:
- IMMEDIATE_QUIT_CHECK;
EXTRACT_NUMBER_AND_INCR (mcnt, p);
DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
mcnt, p + mcnt);
PREFETCH_NOLIMIT ();
c2 = RE_STRING_CHAR (d, dend - d);
#ifdef emacs
- UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+ UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
#endif
s2 = SYNTAX (c2);
}
break;
+ case symbeg:
+ DEBUG_PRINT1 ("EXECUTING symbeg.\n");
+
+ /* We FAIL in one of the following cases: */
+
+ /* Case 1: D is at the end of string. */
+ if (AT_STRINGS_END (d))
+ goto fail;
+ else
+ {
+ /* C1 is the character before D, S1 is the syntax of C1, C2
+ is the character at D, and S2 is the syntax of C2. */
+ re_wchar_t c1, c2;
+ int s1, s2;
+#ifdef emacs
+ int offset = PTR_TO_OFFSET (d);
+ int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ UPDATE_SYNTAX_TABLE (charpos);
+#endif
+ PREFETCH ();
+ c2 = RE_STRING_CHAR (d, dend - d);
+ s2 = SYNTAX (c2);
+
+ /* Case 2: S2 is neither Sword nor Ssymbol. */
+ if (s2 != Sword && s2 != Ssymbol)
+ goto fail;
+
+ /* Case 3: D is not at the beginning of string ... */
+ if (!AT_STRINGS_BEG (d))
+ {
+ GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+ UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
+#endif
+ s1 = SYNTAX (c1);
+
+ /* ... and S1 is Sword or Ssymbol. */
+ if (s1 == Sword || s1 == Ssymbol)
+ goto fail;
+ }
+ }
+ break;
+
+ case symend:
+ DEBUG_PRINT1 ("EXECUTING symend.\n");
+
+ /* We FAIL in one of the following cases: */
+
+ /* Case 1: D is at the beginning of string. */
+ if (AT_STRINGS_BEG (d))
+ goto fail;
+ else
+ {
+ /* C1 is the character before D, S1 is the syntax of C1, C2
+ is the character at D, and S2 is the syntax of C2. */
+ re_wchar_t c1, c2;
+ int s1, s2;
+#ifdef emacs
+ int offset = PTR_TO_OFFSET (d) - 1;
+ int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ UPDATE_SYNTAX_TABLE (charpos);
+#endif
+ GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+ s1 = SYNTAX (c1);
+
+ /* Case 2: S1 is neither Ssymbol nor Sword. */
+ if (s1 != Sword && s1 != Ssymbol)
+ goto fail;
+
+ /* Case 3: D is not at the end of string ... */
+ if (!AT_STRINGS_END (d))
+ {
+ PREFETCH_NOLIMIT ();
+ c2 = RE_STRING_CHAR (d, dend - d);
+#ifdef emacs
+ UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+#endif
+ s2 = SYNTAX (c2);
+
+ /* ... and S2 is Sword or Ssymbol. */
+ if (s2 == Sword || s2 == Ssymbol)
+ goto fail;
+ }
+ }
+ break;
+
case syntaxspec:
case notsyntaxspec:
not = (re_opcode_t) *(p - 1) == notsyntaxspec;