0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
- Copyright (C) 1993,94,95,96,97,98,99,2000 Free Software Foundation, Inc.
+ Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+ 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+ Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
USA. */
-/* BUGS:
- - (x?)*y\1z should match both xxxxyxz and xxxyz.
- TODO:
+/* TODO:
- structure the opcode space into opcode+flag.
- merge with glibc's regex.[ch].
- replace (succeed_n + jump_n + set_number_at) with something that doesn't
#pragma alloca
#endif
-#undef _GNU_SOURCE
-#define _GNU_SOURCE
-
#ifdef HAVE_CONFIG_H
# include <config.h>
#endif
{ \
re_char *dtemp = (p) == (str2) ? (end1) : (p); \
re_char *dlimit = ((p) > (str2) && (p) <= (end2)) ? (str2) : (str1); \
- while (dtemp-- > dlimit && !CHAR_HEAD_P (*dtemp)); \
- c = STRING_CHAR (dtemp, (p) - dtemp); \
+ re_char *d0 = dtemp; \
+ PREV_CHAR_BOUNDARY (d0, dlimit); \
+ c = STRING_CHAR (d0, dtemp - d0); \
} \
else \
(c = ((p) == (str2) ? (end1) : (p))[-1]); \
/* Define the syntax stuff for \<, \>, etc. */
/* Sword must be nonzero for the wordchar pattern commands in re_match_2. */
-enum syntaxcode { Swhitespace = 0, Sword = 1 };
+enum syntaxcode { Swhitespace = 0, Sword = 1, Ssymbol = 2 };
# ifdef SWITCH_ENUM_BUG
# define SWITCH_ENUM_CAST(x) ((int)(x))
# define SINGLE_BYTE_CHAR_P(c) (1)
# define SAME_CHARSET_P(c1, c2) (1)
# define MULTIBYTE_FORM_LENGTH(p, s) (1)
+# define PREV_CHAR_BOUNDARY(p, limit) ((p)--)
# define STRING_CHAR(p, s) (*(p))
# define RE_STRING_CHAR STRING_CHAR
# define CHAR_STRING(c, s) (*(s) = (c), 1)
if (ISALNUM (c))
re_syntax_table[c] = Sword;
- re_syntax_table['_'] = Sword;
+ re_syntax_table['_'] = Ssymbol;
done = 1;
}
wordbound, /* Succeeds if at a word boundary. */
notwordbound, /* Succeeds if not at a word boundary. */
+ symbeg, /* Succeeds if at symbol beginning. */
+ symend, /* Succeeds if at symbol end. */
+
/* Matches any character whose syntax is specified. Followed by
a byte which contains a syntax code, e.g., Sword. */
syntaxspec,
if (start == NULL)
{
- printf ("(null)\n");
+ fprintf (stderr, "(null)\n");
return;
}
/* Loop over pattern commands. */
while (p < pend)
{
- printf ("%d:\t", p - start);
+ fprintf (stderr, "%d:\t", p - start);
switch ((re_opcode_t) *p++)
{
case no_op:
- printf ("/no_op");
+ fprintf (stderr, "/no_op");
break;
case succeed:
- printf ("/succeed");
+ fprintf (stderr, "/succeed");
break;
case exactn:
mcnt = *p++;
- printf ("/exactn/%d", mcnt);
+ fprintf (stderr, "/exactn/%d", mcnt);
do
{
- putchar ('/');
- putchar (*p++);
+ fprintf (stderr, "/%c", *p++);
}
while (--mcnt);
break;
case start_memory:
- printf ("/start_memory/%d", *p++);
+ fprintf (stderr, "/start_memory/%d", *p++);
break;
case stop_memory:
- printf ("/stop_memory/%d", *p++);
+ fprintf (stderr, "/stop_memory/%d", *p++);
break;
case duplicate:
- printf ("/duplicate/%d", *p++);
+ fprintf (stderr, "/duplicate/%d", *p++);
break;
case anychar:
- printf ("/anychar");
+ fprintf (stderr, "/anychar");
break;
case charset:
int length = CHARSET_BITMAP_SIZE (p - 1);
int has_range_table = CHARSET_RANGE_TABLE_EXISTS_P (p - 1);
- printf ("/charset [%s",
- (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
+ fprintf (stderr, "/charset [%s",
+ (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
- assert (p + *p < pend);
+ if (p + *p >= pend)
+ fprintf (stderr, " !extends past end of pattern! ");
for (c = 0; c < 256; c++)
if (c / 8 < length
/* Are we starting a range? */
if (last + 1 == c && ! in_range)
{
- putchar ('-');
+ fprintf (stderr, "-");
in_range = 1;
}
/* Have we broken a range? */
else if (last + 1 != c && in_range)
{
- putchar (last);
+ fprintf (stderr, "%c", last);
in_range = 0;
}
if (! in_range)
- putchar (c);
+ fprintf (stderr, "%c", c);
last = c;
}
if (in_range)
- putchar (last);
+ fprintf (stderr, "%c", last);
- putchar (']');
+ fprintf (stderr, "]");
p += 1 + length;
if (has_range_table)
{
int count;
- printf ("has-range-table");
+ fprintf (stderr, "has-range-table");
/* ??? Should print the range table; for now, just skip it. */
p += 2; /* skip range table bits */
break;
case begline:
- printf ("/begline");
+ fprintf (stderr, "/begline");
break;
case endline:
- printf ("/endline");
+ fprintf (stderr, "/endline");
break;
case on_failure_jump:
extract_number_and_incr (&mcnt, &p);
- printf ("/on_failure_jump to %d", p + mcnt - start);
+ fprintf (stderr, "/on_failure_jump to %d", p + mcnt - start);
break;
case on_failure_keep_string_jump:
extract_number_and_incr (&mcnt, &p);
- printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
+ fprintf (stderr, "/on_failure_keep_string_jump to %d", p + mcnt - start);
break;
case on_failure_jump_nastyloop:
extract_number_and_incr (&mcnt, &p);
- printf ("/on_failure_jump_nastyloop to %d", p + mcnt - start);
+ fprintf (stderr, "/on_failure_jump_nastyloop to %d", p + mcnt - start);
break;
case on_failure_jump_loop:
extract_number_and_incr (&mcnt, &p);
- printf ("/on_failure_jump_loop to %d", p + mcnt - start);
+ fprintf (stderr, "/on_failure_jump_loop to %d", p + mcnt - start);
break;
case on_failure_jump_smart:
extract_number_and_incr (&mcnt, &p);
- printf ("/on_failure_jump_smart to %d", p + mcnt - start);
+ fprintf (stderr, "/on_failure_jump_smart to %d", p + mcnt - start);
break;
case jump:
extract_number_and_incr (&mcnt, &p);
- printf ("/jump to %d", p + mcnt - start);
+ fprintf (stderr, "/jump to %d", p + mcnt - start);
break;
case succeed_n:
extract_number_and_incr (&mcnt, &p);
extract_number_and_incr (&mcnt2, &p);
- printf ("/succeed_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
+ fprintf (stderr, "/succeed_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
break;
case jump_n:
extract_number_and_incr (&mcnt, &p);
extract_number_and_incr (&mcnt2, &p);
- printf ("/jump_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
+ fprintf (stderr, "/jump_n to %d, %d times", p - 2 + mcnt - start, mcnt2);
break;
case set_number_at:
extract_number_and_incr (&mcnt, &p);
extract_number_and_incr (&mcnt2, &p);
- printf ("/set_number_at location %d to %d", p - 2 + mcnt - start, mcnt2);
+ fprintf (stderr, "/set_number_at location %d to %d", p - 2 + mcnt - start, mcnt2);
break;
case wordbound:
- printf ("/wordbound");
+ fprintf (stderr, "/wordbound");
break;
case notwordbound:
- printf ("/notwordbound");
+ fprintf (stderr, "/notwordbound");
break;
case wordbeg:
- printf ("/wordbeg");
+ fprintf (stderr, "/wordbeg");
break;
case wordend:
- printf ("/wordend");
+ fprintf (stderr, "/wordend");
+ break;
+
+ case symbeg:
+ fprintf (stderr, "/symbeg");
+ break;
+
+ case symend:
+ fprintf (stderr, "/symend");
+ break;
case syntaxspec:
- printf ("/syntaxspec");
+ fprintf (stderr, "/syntaxspec");
mcnt = *p++;
- printf ("/%d", mcnt);
+ fprintf (stderr, "/%d", mcnt);
break;
case notsyntaxspec:
- printf ("/notsyntaxspec");
+ fprintf (stderr, "/notsyntaxspec");
mcnt = *p++;
- printf ("/%d", mcnt);
+ fprintf (stderr, "/%d", mcnt);
break;
# ifdef emacs
case before_dot:
- printf ("/before_dot");
+ fprintf (stderr, "/before_dot");
break;
case at_dot:
- printf ("/at_dot");
+ fprintf (stderr, "/at_dot");
break;
case after_dot:
- printf ("/after_dot");
+ fprintf (stderr, "/after_dot");
break;
case categoryspec:
- printf ("/categoryspec");
+ fprintf (stderr, "/categoryspec");
mcnt = *p++;
- printf ("/%d", mcnt);
+ fprintf (stderr, "/%d", mcnt);
break;
case notcategoryspec:
- printf ("/notcategoryspec");
+ fprintf (stderr, "/notcategoryspec");
mcnt = *p++;
- printf ("/%d", mcnt);
+ fprintf (stderr, "/%d", mcnt);
break;
# endif /* emacs */
case begbuf:
- printf ("/begbuf");
+ fprintf (stderr, "/begbuf");
break;
case endbuf:
- printf ("/endbuf");
+ fprintf (stderr, "/endbuf");
break;
default:
- printf ("?%d", *(p-1));
+ fprintf (stderr, "?%d", *(p-1));
}
- putchar ('\n');
+ fprintf (stderr, "\n");
}
- printf ("%d:\tend of pattern.\n", p - start);
+ fprintf (stderr, "%d:\tend of pattern.\n", p - start);
}
reg_syntax_t
re_set_syntax (syntax)
- reg_syntax_t syntax;
+ reg_syntax_t syntax;
{
reg_syntax_t ret = re_syntax_options;
return ret;
}
WEAK_ALIAS (__re_set_syntax, re_set_syntax)
+
+/* Regexp to use to replace spaces, or NULL meaning don't. */
+static re_char *whitespace_regexp;
+
+void
+re_set_whitespace_regexp (regexp)
+ const char *regexp;
+{
+ whitespace_regexp = (re_char *) regexp;
+}
+WEAK_ALIAS (__re_set_syntax, re_set_syntax)
\f
/* This table gives an error message for each of the error codes listed
in regex.h. Obviously the order here has to be same as there.
gettext_noop ("Premature end of regular expression"), /* REG_EEND */
gettext_noop ("Regular expression too big"), /* REG_ESIZE */
gettext_noop ("Unmatched ) or \\)"), /* REG_ERPAREN */
+ gettext_noop ("Range striding over charsets") /* REG_ERANGEX */
};
\f
/* Avoiding alloca during matching, to placate r_alloc. */
} \
} while (0)
-/* Discard a saved register off the stack. */
-#define DISCARD_FAILURE_REG_OR_COUNT() \
-do { \
- int reg = POP_FAILURE_INT (); \
- if (reg == -1) \
- { \
- /* It's a counter. */ \
- POP_FAILURE_POINTER (); \
- reg = POP_FAILURE_INT (); \
- DEBUG_PRINT3 (" Discard counter %p = %d\n", ptr, reg); \
- } \
- else \
- { \
- POP_FAILURE_POINTER (); \
- POP_FAILURE_POINTER (); \
- DEBUG_PRINT4 (" Discard reg %d (spanning %p -> %p)\n", \
- reg, regstart[reg], regend[reg]); \
- } \
-} while (0)
-
/* Check that we are not stuck in an infinite loop. */
#define CHECK_INFINITE_LOOP(pat_cur, string_place) \
do { \
&& FAILURE_PAT (failure) <= bufp->buffer + bufp->used); \
if (FAILURE_PAT (failure) == pat_cur) \
{ \
- while (fail_stack.frame < fail_stack.avail) \
- DISCARD_FAILURE_REG_OR_COUNT (); \
- goto fail; \
+ cycle = 1; \
+ break; \
} \
DEBUG_PRINT2 (" Other pattern: %p\n", FAILURE_PAT (failure)); \
failure = NEXT_FAILURE_HANDLE(failure); \
} \
DEBUG_PRINT2 (" Other string: %p\n", FAILURE_STR (failure)); \
} while (0)
-
+
/* Push the information about the state we will need
if we ever fail back to it.
/* This is not an arbitrary limit: the arguments which represent offsets
- into the pattern are two bytes long. So if 2^16 bytes turns out to
+ into the pattern are two bytes long. So if 2^15 bytes turns out to
be too small, many things would have to change. */
+# define MAX_BUF_SIZE (1L << 15)
+
+#if 0 /* This is when we thought it could be 2^16 bytes. */
/* Any other compiler which, like MSC, has allocation limit below 2^16
bytes will have to use approach similar to what was done below for
MSC and drop MAX_BUF_SIZE a bit. Otherwise you may end up
#else
# define MAX_BUF_SIZE (1L << 16)
#endif
+#endif /* 0 */
/* Extend the buffer by twice its current size via realloc and
reset the pointers that pointed into the old block to point to the
/* Get the next unsigned number in the uncompiled pattern. */
#define GET_UNSIGNED_NUMBER(num) \
- do { if (p != pend) \
- { \
- PATFETCH (c); \
- if (c == ' ') \
- FREE_STACK_RETURN (REG_BADBR); \
- while ('0' <= c && c <= '9') \
- { \
- int prev; \
- if (num < 0) \
- num = 0; \
- prev = num; \
- num = num * 10 + c - '0'; \
- if (num / 10 != prev) \
- FREE_STACK_RETURN (REG_BADBR); \
- if (p == pend) \
- break; \
- PATFETCH (c); \
- } \
- if (c == ' ') \
- FREE_STACK_RETURN (REG_BADBR); \
- } \
- } while (0)
+ do { \
+ if (p == pend) \
+ FREE_STACK_RETURN (REG_EBRACE); \
+ else \
+ { \
+ PATFETCH (c); \
+ while ('0' <= c && c <= '9') \
+ { \
+ int prev; \
+ if (num < 0) \
+ num = 0; \
+ prev = num; \
+ num = num * 10 + c - '0'; \
+ if (num / 10 != prev) \
+ FREE_STACK_RETURN (REG_BADBR); \
+ if (p == pend) \
+ FREE_STACK_RETURN (REG_EBRACE); \
+ PATFETCH (c); \
+ } \
+ } \
+ } while (0)
\f
-#if WIDE_CHAR_SUPPORT
-/* The GNU C library provides support for user-defined character classes
- and the functions from ISO C amendement 1. */
-# ifdef CHARCLASS_NAME_MAX
-# define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
-# else
-/* This shouldn't happen but some implementation might still have this
- problem. Use a reasonable default value. */
-# define CHAR_CLASS_MAX_LENGTH 256
-# endif
-typedef wctype_t re_wctype_t;
-typedef wchar_t re_wchar_t;
-# define re_wctype wctype
-# define re_iswctype iswctype
-# define re_wctype_to_bit(cc) 0
-#else
-# define CHAR_CLASS_MAX_LENGTH 9 /* Namely, `multibyte'. */
-# define btowc(c) c
-
-/* Character classes. */
-typedef enum { RECC_ERROR = 0,
- RECC_ALNUM, RECC_ALPHA, RECC_WORD,
- RECC_GRAPH, RECC_PRINT,
- RECC_LOWER, RECC_UPPER,
- RECC_PUNCT, RECC_CNTRL,
- RECC_DIGIT, RECC_XDIGIT,
- RECC_BLANK, RECC_SPACE,
- RECC_MULTIBYTE, RECC_NONASCII,
- RECC_ASCII, RECC_UNIBYTE
-} re_wctype_t;
-
-typedef int re_wchar_t;
+#if ! WIDE_CHAR_SUPPORT
/* Map a string to the char class it names (if any). */
-static re_wctype_t
+re_wctype_t
re_wctype (str)
re_char *str;
{
}
/* True iff CH is in the char class CC. */
-static boolean
+boolean
re_iswctype (ch, cc)
int ch;
re_wctype_t cc;
static void
extend_range_table_work_area (work_area)
struct range_table_work_area *work_area;
-{
+{
work_area->allocated += 16 * sizeof (int);
if (work_area->table)
work_area->table
`strange' indicates a character that has more than one
case-equivalent. */
-
+
enum case_type {one_case, two_case, strange};
/* Describe the run that is in progress,
}
run_type = strange;
}
-
+
if (this_type == strange)
{
/* For a strange character, add each of its equivalents, one
/* If the object matched can contain multibyte characters. */
const boolean multibyte = RE_MULTIBYTE_P (bufp);
+ /* Nonzero if we have pushed down into a subpattern. */
+ int in_subpattern = 0;
+
+ /* These hold the values of p, pattern, and pend from the main
+ pattern when we have pushed into a subpattern. */
+ re_char *main_p;
+ re_char *main_pattern;
+ re_char *main_pend;
+
#ifdef DEBUG
debug++;
DEBUG_PRINT1 ("\nCompiling pattern: ");
begalt = b = bufp->buffer;
/* Loop through the uncompiled pattern until we're at the end. */
- while (p != pend)
+ while (1)
{
+ if (p == pend)
+ {
+ /* If this is the end of an included regexp,
+ pop back to the main regexp and try again. */
+ if (in_subpattern)
+ {
+ in_subpattern = 0;
+ pattern = main_pattern;
+ p = main_p;
+ pend = main_pend;
+ continue;
+ }
+ /* If this is the end of the main regexp, we are done. */
+ break;
+ }
+
PATFETCH (c);
switch (c)
{
+ case ' ':
+ {
+ re_char *p1 = p;
+
+ /* If there's no special whitespace regexp, treat
+ spaces normally. And don't try to do this recursively. */
+ if (!whitespace_regexp || in_subpattern)
+ goto normal_char;
+
+ /* Peek past following spaces. */
+ while (p1 != pend)
+ {
+ if (*p1 != ' ')
+ break;
+ p1++;
+ }
+ /* If the spaces are followed by a repetition op,
+ treat them normally. */
+ if (p1 != pend
+ && (*p1 == '*' || *p1 == '+' || *p1 == '?'
+ || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
+ goto normal_char;
+
+ /* Replace the spaces with the whitespace regexp. */
+ in_subpattern = 1;
+ main_p = p1;
+ main_pend = pend;
+ main_pattern = pattern;
+ p = pattern = whitespace_regexp;
+ pend = p + strlen (p);
+ break;
+ }
+
case '^':
{
if ( /* If at start of pattern, it's an operator. */
unsigned int startoffset = 0;
re_opcode_t ofj =
/* Check if the loop can match the empty string. */
- (simple || !analyse_first (laststart, b, NULL, 0)) ?
- on_failure_jump : on_failure_jump_loop;
+ (simple || !analyse_first (laststart, b, NULL, 0))
+ ? on_failure_jump : on_failure_jump_loop;
assert (skip_one_char (laststart) <= b);
-
+
if (!zero_times_ok && simple)
{ /* Since simple * loops can be made faster by using
on_failure_keep_string_jump, we turn simple P+
{
boolean emptyp = analyse_first (laststart, b, NULL, 0);
- /* The non-greedy multiple match looks like a repeat..until:
- we only need a conditional jump at the end of the loop */
+ /* The non-greedy multiple match looks like
+ a repeat..until: we only need a conditional jump
+ at the end of the loop. */
if (emptyp) BUF_PUSH (no_op);
STORE_JUMP (emptyp ? on_failure_jump_nastyloop
: on_failure_jump, b, laststart);
{
/* The repeat...until naturally matches one or more.
To also match zero times, we need to first jump to
- the end of the loop (its conditional jump). */
+ the end of the loop (its conditional jump). */
INSERT_JUMP (jump, laststart, b);
b += 3;
}
}
}
else if (!SAME_CHARSET_P (c, c1))
- FREE_STACK_RETURN (REG_ERANGE);
+ FREE_STACK_RETURN (REG_ERANGEX);
}
else
/* Range from C to C. */
beg_interval = p;
- if (p == pend)
- FREE_STACK_RETURN (REG_EBRACE);
-
GET_UNSIGNED_NUMBER (lower_bound);
if (c == ',')
{
if (c != '\\')
FREE_STACK_RETURN (REG_BADBR);
-
+ if (p == pend)
+ FREE_STACK_RETURN (REG_EESCAPE);
PATFETCH (c);
}
goto unfetch_interval;
}
- if (upper_bound == 0)
- /* If the upper bound is zero, just drop the sub pattern
- altogether. */
- b = laststart;
- else if (lower_bound == 1 && upper_bound == 1)
- /* Just match it once: nothing to do here. */
- ;
-
- /* Otherwise, we have a nontrivial interval. When
- we're all done, the pattern will look like:
- set_number_at <jump count> <upper bound>
- set_number_at <succeed_n count> <lower bound>
- succeed_n <after jump addr> <succeed_n count>
- <body of loop>
- jump_n <succeed_n addr> <jump count>
- (The upper bound and `jump_n' are omitted if
- `upper_bound' is 1, though.) */
- else
- { /* If the upper bound is > 1, we need to insert
- more at the end of the loop. */
- unsigned int nbytes = (upper_bound < 0 ? 3
- : upper_bound > 1 ? 5 : 0);
- unsigned int startoffset = 0;
-
- GET_BUFFER_SPACE (20); /* We might use less. */
-
- if (lower_bound == 0)
- {
- /* A succeed_n that starts with 0 is really a
- a simple on_failure_jump_loop. */
- INSERT_JUMP (on_failure_jump_loop, laststart,
- b + 3 + nbytes);
- b += 3;
- }
- else
- {
- /* Initialize lower bound of the `succeed_n', even
- though it will be set during matching by its
- attendant `set_number_at' (inserted next),
- because `re_compile_fastmap' needs to know.
- Jump to the `jump_n' we might insert below. */
- INSERT_JUMP2 (succeed_n, laststart,
- b + 5 + nbytes,
- lower_bound);
- b += 5;
-
- /* Code to initialize the lower bound. Insert
- before the `succeed_n'. The `5' is the last two
- bytes of this `set_number_at', plus 3 bytes of
- the following `succeed_n'. */
- insert_op2 (set_number_at, laststart, 5, lower_bound, b);
- b += 5;
- startoffset += 5;
- }
-
- if (upper_bound < 0)
- {
- /* A negative upper bound stands for infinity,
- in which case it degenerates to a plain jump. */
- STORE_JUMP (jump, b, laststart + startoffset);
- b += 3;
- }
- else if (upper_bound > 1)
- { /* More than one repetition is allowed, so
- append a backward jump to the `succeed_n'
- that starts this interval.
-
- When we've reached this during matching,
- we'll have matched the interval once, so
- jump back only `upper_bound - 1' times. */
- STORE_JUMP2 (jump_n, b, laststart + startoffset,
- upper_bound - 1);
- b += 5;
-
- /* The location we want to set is the second
- parameter of the `jump_n'; that is `b-2' as
- an absolute address. `laststart' will be
- the `set_number_at' we're about to insert;
- `laststart+3' the number to set, the source
- for the relative address. But we are
- inserting into the middle of the pattern --
- so everything is getting moved up by 5.
- Conclusion: (b - 2) - (laststart + 3) + 5,
- i.e., b - laststart.
-
- We insert this at the beginning of the loop
- so that if we fail during matching, we'll
- reinitialize the bounds. */
- insert_op2 (set_number_at, laststart, b - laststart,
- upper_bound - 1, b);
- b += 5;
- }
- }
+ if (upper_bound == 0)
+ /* If the upper bound is zero, just drop the sub pattern
+ altogether. */
+ b = laststart;
+ else if (lower_bound == 1 && upper_bound == 1)
+ /* Just match it once: nothing to do here. */
+ ;
+
+ /* Otherwise, we have a nontrivial interval. When
+ we're all done, the pattern will look like:
+ set_number_at <jump count> <upper bound>
+ set_number_at <succeed_n count> <lower bound>
+ succeed_n <after jump addr> <succeed_n count>
+ <body of loop>
+ jump_n <succeed_n addr> <jump count>
+ (The upper bound and `jump_n' are omitted if
+ `upper_bound' is 1, though.) */
+ else
+ { /* If the upper bound is > 1, we need to insert
+ more at the end of the loop. */
+ unsigned int nbytes = (upper_bound < 0 ? 3
+ : upper_bound > 1 ? 5 : 0);
+ unsigned int startoffset = 0;
+
+ GET_BUFFER_SPACE (20); /* We might use less. */
+
+ if (lower_bound == 0)
+ {
+ /* A succeed_n that starts with 0 is really a
+ a simple on_failure_jump_loop. */
+ INSERT_JUMP (on_failure_jump_loop, laststart,
+ b + 3 + nbytes);
+ b += 3;
+ }
+ else
+ {
+ /* Initialize lower bound of the `succeed_n', even
+ though it will be set during matching by its
+ attendant `set_number_at' (inserted next),
+ because `re_compile_fastmap' needs to know.
+ Jump to the `jump_n' we might insert below. */
+ INSERT_JUMP2 (succeed_n, laststart,
+ b + 5 + nbytes,
+ lower_bound);
+ b += 5;
+
+ /* Code to initialize the lower bound. Insert
+ before the `succeed_n'. The `5' is the last two
+ bytes of this `set_number_at', plus 3 bytes of
+ the following `succeed_n'. */
+ insert_op2 (set_number_at, laststart, 5, lower_bound, b);
+ b += 5;
+ startoffset += 5;
+ }
+
+ if (upper_bound < 0)
+ {
+ /* A negative upper bound stands for infinity,
+ in which case it degenerates to a plain jump. */
+ STORE_JUMP (jump, b, laststart + startoffset);
+ b += 3;
+ }
+ else if (upper_bound > 1)
+ { /* More than one repetition is allowed, so
+ append a backward jump to the `succeed_n'
+ that starts this interval.
+
+ When we've reached this during matching,
+ we'll have matched the interval once, so
+ jump back only `upper_bound - 1' times. */
+ STORE_JUMP2 (jump_n, b, laststart + startoffset,
+ upper_bound - 1);
+ b += 5;
+
+ /* The location we want to set is the second
+ parameter of the `jump_n'; that is `b-2' as
+ an absolute address. `laststart' will be
+ the `set_number_at' we're about to insert;
+ `laststart+3' the number to set, the source
+ for the relative address. But we are
+ inserting into the middle of the pattern --
+ so everything is getting moved up by 5.
+ Conclusion: (b - 2) - (laststart + 3) + 5,
+ i.e., b - laststart.
+
+ We insert this at the beginning of the loop
+ so that if we fail during matching, we'll
+ reinitialize the bounds. */
+ insert_op2 (set_number_at, laststart, b - laststart,
+ upper_bound - 1, b);
+ b += 5;
+ }
+ }
pending_exact = 0;
beg_interval = NULL;
}
BUF_PUSH (wordend);
break;
+ case '_':
+ if (syntax & RE_NO_GNU_OPS)
+ goto normal_char;
+ laststart = b;
+ PATFETCH (c);
+ if (c == '<')
+ BUF_PUSH (symbeg);
+ else if (c == '>')
+ BUF_PUSH (symend);
+ else
+ FREE_STACK_RETURN (REG_BADPAT);
+ break;
+
case 'b':
if (syntax & RE_NO_GNU_OPS)
goto normal_char;
if (syntax & RE_NO_POSIX_BACKTRACKING)
BUF_PUSH (succeed);
- free (compile_stack.stack);
-
/* We have succeeded; set the length of the buffer. */
bufp->used = b - bufp->buffer;
}
#endif /* not MATCH_MAY_ALLOCATE */
- return REG_NOERROR;
+ FREE_STACK_RETURN (REG_NOERROR);
} /* regex_compile */
\f
/* Subroutines for `regex_compile'. */
case notwordbound:
case wordbeg:
case wordend:
+ case symbeg:
+ case symend:
continue;
case has already been handled, so we only need to look at the
fallthrough case. */
continue;
-
+
case succeed_n:
/* If N == 0, it should be an on_failure_jump_loop instead. */
DEBUG_STATEMENT (EXTRACT_NUMBER (j, p + 2); assert (j > 0));
}
WEAK_ALIAS (__re_search, re_search)
+/* Head address of virtual concatenation of string. */
+#define HEAD_ADDR_VSTRING(P) \
+ (((P) >= size1 ? string2 : string1))
+
/* End address of virtual concatenation of string. */
#define STOP_ADDR_VSTRING(P) \
(((P) >= size1 ? string2 + size2 : string1 + size1))
d += buf_charlen;
}
else
- while (range > lim
- && !fastmap[RE_TRANSLATE (translate, *d)])
- {
- d++;
- range--;
- }
+ {
+ /* Convert *d to integer to shut up GCC's
+ whining about comparison that is always
+ true. */
+ int di = *d;
+
+ while (range > lim
+ && !fastmap[RE_TRANSLATE (translate, di)])
+ {
+ di = *(++d);
+ range--;
+ }
+ }
}
else
while (range > lim && !fastmap[*d])
/* Update STARTPOS to the previous character boundary. */
if (multibyte)
{
- re_char *p = POS_ADDR_VSTRING (startpos);
- int len = 0;
+ re_char *p = POS_ADDR_VSTRING (startpos) + 1;
+ re_char *p0 = p;
+ re_char *phead = HEAD_ADDR_VSTRING (startpos);
/* Find the head of multibyte form. */
- while (!CHAR_HEAD_P (*p))
- p--, len++;
-
- /* Adjust it. */
-#if 0 /* XXX */
- if (MULTIBYTE_FORM_LENGTH (p, len + 1) != (len + 1))
- ;
- else
-#endif
- {
- range += len;
- if (range > 0)
- break;
+ PREV_CHAR_BOUNDARY (p, phead);
+ range += p0 - 1 - p;
+ if (range > 0)
+ break;
- startpos -= len;
- }
+ startpos -= p0 - 1 - p;
}
}
}
{
case anychar:
break;
-
+
case exactn:
p += *p + 1;
break;
else
p += 1 + CHARSET_BITMAP_SIZE (p - 1);
break;
-
+
case syntaxspec:
case notsyntaxspec:
#ifdef emacs
/* Jump over non-matching operations. */
-static unsigned char *
+static re_char *
skip_noops (p, pend)
- unsigned char *p, *pend;
+ re_char *p, *pend;
{
int mcnt;
while (p < pend)
static int
mutually_exclusive_p (bufp, p1, p2)
struct re_pattern_buffer *bufp;
- unsigned char *p1, *p2;
+ re_char *p1, *p2;
{
re_opcode_t op2;
const boolean multibyte = RE_MULTIBYTE_P (bufp);
return 1;
}
break;
-
+
case endline:
case exactn:
{
}
}
break;
-
+
case charset_not:
switch (SWITCH_ENUM_CAST (*p1))
{
break;
case wordend:
- case notsyntaxspec:
+ return ((re_opcode_t) *p1 == syntaxspec && p1[1] == Sword);
+ case symend:
return ((re_opcode_t) *p1 == syntaxspec
- && p1[1] == (op2 == wordend ? Sword : p2[1]));
+ && (p1[1] == Ssymbol || p1[1] == Sword));
+ case notsyntaxspec:
+ return ((re_opcode_t) *p1 == syntaxspec && p1[1] == p2[1]);
case wordbeg:
- case syntaxspec:
+ return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == Sword);
+ case symbeg:
return ((re_opcode_t) *p1 == notsyntaxspec
- && p1[1] == (op2 == wordend ? Sword : p2[1]));
+ && (p1[1] == Ssymbol || p1[1] == Sword));
+ case syntaxspec:
+ return ((re_opcode_t) *p1 == notsyntaxspec && p1[1] == p2[1]);
case wordbound:
return (((re_opcode_t) *p1 == notsyntaxspec
else
do
{
+ /* Avoid compiler whining about comparison being
+ always true. */
+ int di;
+
PREFETCH ();
- if (RE_TRANSLATE (translate, *d) != *p++)
+ di = *d;
+ if (RE_TRANSLATE (translate, di) != *p++)
{
d = dfail;
goto fail;
assert (!REG_UNSET (regstart[*p]));
/* Strictly speaking, there should be code such as:
-
+
assert (REG_UNSET (regend[*p]));
PUSH_FAILURE_REGSTOP ((unsigned int)*p);
cycle detection cannot work. Worse yet, such a detection
can not only fail to detect a cycle, but it can also wrongly
detect a cycle (between different instantiations of the same
- loop.
+ loop).
So the method used for those nasty loops is a little different:
We use a special cycle-detection-stack-frame which is pushed
when the on_failure_jump_nastyloop failure-point is *popped*.
mcnt, p + mcnt);
assert ((re_opcode_t)p[-4] == no_op);
- CHECK_INFINITE_LOOP (p - 4, d);
- PUSH_FAILURE_POINT (p - 3, d);
+ {
+ int cycle = 0;
+ CHECK_INFINITE_LOOP (p - 4, d);
+ if (!cycle)
+ /* If there's a cycle, just continue without pushing
+ this failure point. The failure point is the "try again"
+ option, which shouldn't be tried.
+ We want (x?)*?y\1z to match both xxyz and xxyxz. */
+ PUSH_FAILURE_POINT (p - 3, d);
+ }
break;
-
/* Simple loop detecting on_failure_jump: just check on the
failure stack if the same spot was already hit earlier. */
case on_failure_jump_loop:
EXTRACT_NUMBER_AND_INCR (mcnt, p);
DEBUG_PRINT3 ("EXECUTING on_failure_jump_loop %d (to %p):\n",
mcnt, p + mcnt);
-
- CHECK_INFINITE_LOOP (p - 3, d);
- PUSH_FAILURE_POINT (p - 3, d);
+ {
+ int cycle = 0;
+ CHECK_INFINITE_LOOP (p - 3, d);
+ if (cycle)
+ /* If there's a cycle, get out of the loop, as if the matching
+ had failed. We used to just `goto fail' here, but that was
+ aborting the search a bit too early: we want to keep the
+ empty-loop-match and keep matching after the loop.
+ We want (x?)*y\1z to match both xxyz and xxyxz. */
+ p += mcnt;
+ else
+ PUSH_FAILURE_POINT (p - 3, d);
+ }
break;
PREFETCH ();
c2 = RE_STRING_CHAR (d, dend - d);
s2 = SYNTAX (c2);
-
+
/* Case 2: S2 is not Sword. */
if (s2 != Sword)
goto fail;
PREFETCH_NOLIMIT ();
c2 = RE_STRING_CHAR (d, dend - d);
#ifdef emacs
- UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+ UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
#endif
s2 = SYNTAX (c2);
}
break;
+ case symbeg:
+ DEBUG_PRINT1 ("EXECUTING symbeg.\n");
+
+ /* We FAIL in one of the following cases: */
+
+ /* Case 1: D is at the end of string. */
+ if (AT_STRINGS_END (d))
+ goto fail;
+ else
+ {
+ /* C1 is the character before D, S1 is the syntax of C1, C2
+ is the character at D, and S2 is the syntax of C2. */
+ re_wchar_t c1, c2;
+ int s1, s2;
+#ifdef emacs
+ int offset = PTR_TO_OFFSET (d);
+ int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ UPDATE_SYNTAX_TABLE (charpos);
+#endif
+ PREFETCH ();
+ c2 = RE_STRING_CHAR (d, dend - d);
+ s2 = SYNTAX (c2);
+
+ /* Case 2: S2 is neither Sword nor Ssymbol. */
+ if (s2 != Sword && s2 != Ssymbol)
+ goto fail;
+
+ /* Case 3: D is not at the beginning of string ... */
+ if (!AT_STRINGS_BEG (d))
+ {
+ GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+#ifdef emacs
+ UPDATE_SYNTAX_TABLE_BACKWARD (charpos - 1);
+#endif
+ s1 = SYNTAX (c1);
+
+ /* ... and S1 is Sword or Ssymbol. */
+ if (s1 == Sword || s1 == Ssymbol)
+ goto fail;
+ }
+ }
+ break;
+
+ case symend:
+ DEBUG_PRINT1 ("EXECUTING symend.\n");
+
+ /* We FAIL in one of the following cases: */
+
+ /* Case 1: D is at the beginning of string. */
+ if (AT_STRINGS_BEG (d))
+ goto fail;
+ else
+ {
+ /* C1 is the character before D, S1 is the syntax of C1, C2
+ is the character at D, and S2 is the syntax of C2. */
+ re_wchar_t c1, c2;
+ int s1, s2;
+#ifdef emacs
+ int offset = PTR_TO_OFFSET (d) - 1;
+ int charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
+ UPDATE_SYNTAX_TABLE (charpos);
+#endif
+ GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
+ s1 = SYNTAX (c1);
+
+ /* Case 2: S1 is neither Ssymbol nor Sword. */
+ if (s1 != Sword && s1 != Ssymbol)
+ goto fail;
+
+ /* Case 3: D is not at the end of string ... */
+ if (!AT_STRINGS_END (d))
+ {
+ PREFETCH_NOLIMIT ();
+ c2 = RE_STRING_CHAR (d, dend - d);
+#ifdef emacs
+ UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+#endif
+ s2 = SYNTAX (c2);
+
+ /* ... and S2 is Sword or Ssymbol. */
+ if (s2 == Sword || s2 == Ssymbol)
+ goto fail;
+ }
+ }
+ break;
+
case syntaxspec:
case notsyntaxspec:
not = (re_opcode_t) *(p - 1) == notsyntaxspec;
const regex_t *__restrict preg;
const char *__restrict string;
size_t nmatch;
- regmatch_t pmatch[];
+ regmatch_t pmatch[__restrict_arr];
int eflags;
{
int ret;
WEAK_ALIAS (__regfree, regfree)
#endif /* not emacs */
+
+/* arch-tag: 4ffd68ba-2a9e-435b-a21a-018990f9eeb2
+ (do not change this comment) */