0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
- Copyright (C) 1993-2014 Free Software Foundation, Inc.
+ Copyright (C) 1993-2016 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
/* The rest must handle multibyte characters. */
# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
- ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
- : 1)
+ ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \
+ : graphicp (c))
# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
- : 1)
+ : printablep (c))
# define ISALNUM(c) (IS_REAL_ASCII (c) \
? (((c) >= 'a' && (c) <= 'z') \
|| ((c) >= 'A' && (c) <= 'Z') \
|| ((c) >= '0' && (c) <= '9')) \
- : SYNTAX (c) == Sword)
+ : (alphabeticp (c) || decimalnump (c)))
# define ISALPHA(c) (IS_REAL_ASCII (c) \
? (((c) >= 'a' && (c) <= 'z') \
|| ((c) >= 'A' && (c) <= 'Z')) \
- : SYNTAX (c) == Sword)
+ : alphabeticp (c))
# define ISLOWER(c) lowercasep (c)
#define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
-#ifndef emacs
+#ifndef emacs
# undef max
# undef min
# define max(a, b) ((a) > (b) ? (a) : (b))
DEBUG_PRINT (" Push frame index: %zd\n", fail_stack.frame); \
PUSH_FAILURE_INT (fail_stack.frame); \
\
- DEBUG_PRINT (" Push string %p: `", string_place); \
+ DEBUG_PRINT (" Push string %p: \"", string_place); \
DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
- DEBUG_PRINT ("'\n"); \
+ DEBUG_PRINT ("\"\n"); \
PUSH_FAILURE_POINTER (string_place); \
\
DEBUG_PRINT (" Push pattern %p: ", pattern); \
on_failure_keep_string_jump opcode, and we want to throw away the \
saved NULL, thus retaining our current position in the string. */ \
str = POP_FAILURE_POINTER (); \
- DEBUG_PRINT (" Popping string %p: `", str); \
+ DEBUG_PRINT (" Popping string %p: \"", str); \
DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
- DEBUG_PRINT ("'\n"); \
+ DEBUG_PRINT ("\"\n"); \
\
fail_stack.frame = POP_FAILURE_INT (); \
DEBUG_PRINT (" Popping frame index: %zd\n", fail_stack.frame); \
static boolean at_endline_loc_p (re_char *p, re_char *pend,
reg_syntax_t syntax);
static re_char *skip_one_char (re_char *p);
-static int analyse_first (re_char *p, re_char *pend,
+static int analyze_first (re_char *p, re_char *pend,
char *fastmap, const int multibyte);
/* Fetch the next character in the uncompiled pattern, with no
#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
/* Bits used to implement the multibyte-part of the various character classes
- such as [:alnum:] in a charset's range table. */
+ such as [:alnum:] in a charset's range table. The code currently assumes
+ that only the low 16 bits are used. */
#define BIT_WORD 0x1
#define BIT_LOWER 0x2
#define BIT_PUNCT 0x4
#define BIT_SPACE 0x8
#define BIT_UPPER 0x10
#define BIT_MULTIBYTE 0x20
+#define BIT_ALPHA 0x40
+#define BIT_ALNUM 0x80
+#define BIT_GRAPH 0x100
+#define BIT_PRINT 0x200
\f
/* Set the bit for character C in a list. */
{
switch (cc)
{
- case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
+ case RECC_NONASCII:
case RECC_MULTIBYTE: return BIT_MULTIBYTE;
- case RECC_ALPHA: case RECC_ALNUM: case RECC_WORD: return BIT_WORD;
+ case RECC_ALPHA: return BIT_ALPHA;
+ case RECC_ALNUM: return BIT_ALNUM;
+ case RECC_WORD: return BIT_WORD;
case RECC_LOWER: return BIT_LOWER;
case RECC_UPPER: return BIT_UPPER;
case RECC_PUNCT: return BIT_PUNCT;
case RECC_SPACE: return BIT_SPACE;
+ case RECC_GRAPH: return BIT_GRAPH;
+ case RECC_PRINT: return BIT_PRINT;
case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
default:
size_t startoffset = 0;
re_opcode_t ofj =
/* Check if the loop can match the empty string. */
- (simple || !analyse_first (laststart, b, NULL, 0))
+ (simple || !analyze_first (laststart, b, NULL, 0))
? on_failure_jump : on_failure_jump_loop;
assert (skip_one_char (laststart) <= b);
GET_BUFFER_SPACE (7); /* We might use less. */
if (many_times_ok)
{
- boolean emptyp = analyse_first (laststart, b, NULL, 0);
+ boolean emptyp = analyze_first (laststart, b, NULL, 0);
/* The non-greedy multiple match looks like
a repeat..until: we only need a conditional jump
#endif /* emacs */
/* In most cases the matching rule for char classes
only uses the syntax table for multibyte chars,
- so that the content of the syntax-table it is not
+ so that the content of the syntax-table is not
hardcoded in the range_table. SPACE and WORD are
the two exceptions. */
if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
p = class_beg;
SET_LIST_BIT ('[');
- /* Because the `:' may starts the range, we
+ /* Because the `:' may start the range, we
can't simply set bit and repeat the loop.
Instead, just set it to C and handle below. */
c = ':';
return false;
}
\f
-/* analyse_first.
+/* analyze_first.
If fastmap is non-NULL, go through the pattern and fill fastmap
with all the possible leading chars. If fastmap is NULL, don't
bother filling it up (obviously) and only return whether the
Return -1 if fastmap was not updated accurately. */
static int
-analyse_first (const_re_char *p, const_re_char *pend, char *fastmap,
+analyze_first (const_re_char *p, const_re_char *pend, char *fastmap,
const int multibyte)
{
int j, k;
{ /* We have to look down both arms.
We first go down the "straight" path so as to minimize
stack usage when going through alternatives. */
- int r = analyse_first (p, pend, fastmap, multibyte);
+ int r = analyze_first (p, pend, fastmap, multibyte);
if (r) return r;
p += j;
}
/* We reached the end without matching anything. */
return 1;
-} /* analyse_first */
+} /* analyze_first */
\f
/* re_compile_fastmap computes a ``fastmap'' for the compiled pattern in
BUFP. A fastmap records which of the (1 << BYTEWIDTH) possible
memset (fastmap, 0, 1 << BYTEWIDTH); /* Assume nothing's valid. */
bufp->fastmap_accurate = 1; /* It will be when we're done. */
- analysis = analyse_first (bufp->buffer, bufp->buffer + bufp->used,
+ analysis = analyze_first (bufp->buffer, bufp->buffer + bufp->used,
fastmap, RE_MULTIBYTE_P (bufp));
bufp->can_be_null = (analysis != 0);
return 0;
DEBUG_PRINT ("The compiled pattern is: ");
DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
- DEBUG_PRINT ("The string to match is: `");
+ DEBUG_PRINT ("The string to match is: \"");
DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
- DEBUG_PRINT ("'\n");
+ DEBUG_PRINT ("\"\n");
/* This loops over pattern commands. It exits by returning from the
function if the match is complete, or it drops through if the match
&& buf_ch == '\000'))
goto fail;
- DEBUG_PRINT (" Matched `%d'.\n", *d);
+ DEBUG_PRINT (" Matched \"%d\".\n", *d);
d += buf_charlen;
}
break;
case charset:
case charset_not:
{
- register unsigned int c;
+ register unsigned int c, corig;
boolean not = (re_opcode_t) *(p - 1) == charset_not;
int len;
}
PREFETCH ();
- c = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
+ corig = c = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
if (target_multibyte)
{
int c1;
{
int class_bits = CHARSET_RANGE_TABLE_BITS (&p[-1]);
- if ( (class_bits & BIT_LOWER && ISLOWER (c))
+ if ( (class_bits & BIT_LOWER
+ && (ISLOWER (c)
+ || (corig != c
+ && c == upcase (corig) && ISUPPER(c))))
| (class_bits & BIT_MULTIBYTE)
| (class_bits & BIT_PUNCT && ISPUNCT (c))
| (class_bits & BIT_SPACE && ISSPACE (c))
- | (class_bits & BIT_UPPER && ISUPPER (c))
- | (class_bits & BIT_WORD && ISWORD (c)))
+ | (class_bits & BIT_UPPER
+ && (ISUPPER (c)
+ || (corig != c
+ && c == downcase (corig) && ISLOWER (c))))
+ | (class_bits & BIT_WORD && ISWORD (c))
+ | (class_bits & BIT_ALPHA && ISALPHA (c))
+ | (class_bits & BIT_ALNUM && ISALNUM (c))
+ | (class_bits & BIT_GRAPH && ISGRAPH (c))
+ | (class_bits & BIT_PRINT && ISPRINT (c)))
not = !not;
else
CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d - 1);
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (charpos);
+ UPDATE_SYNTAX_TABLE_FAST (charpos);
#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
#ifdef emacs
- UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+ UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos + 1);
#endif
PREFETCH_NOLIMIT ();
GET_CHAR_AFTER (c2, d, dummy);
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d);
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (charpos);
+ UPDATE_SYNTAX_TABLE_FAST (charpos);
#endif
PREFETCH ();
GET_CHAR_AFTER (c2, d, dummy);
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d) - 1;
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (charpos);
+ UPDATE_SYNTAX_TABLE_FAST (charpos);
#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
PREFETCH_NOLIMIT ();
GET_CHAR_AFTER (c2, d, dummy);
#ifdef emacs
- UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+ UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos);
#endif
s2 = SYNTAX (c2);
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d);
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (charpos);
+ UPDATE_SYNTAX_TABLE_FAST (charpos);
#endif
PREFETCH ();
c2 = RE_STRING_CHAR (d, target_multibyte);
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d) - 1;
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (charpos);
+ UPDATE_SYNTAX_TABLE_FAST (charpos);
#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
PREFETCH_NOLIMIT ();
c2 = RE_STRING_CHAR (d, target_multibyte);
#ifdef emacs
- UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+ UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos + 1);
#endif
s2 = SYNTAX (c2);
{
ssize_t offset = PTR_TO_OFFSET (d);
ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (pos1);
+ UPDATE_SYNTAX_TABLE_FAST (pos1);
}
#endif
{