0.12. (Implements POSIX draft P1003.2/D11.2, except for some of the
internationalization features.)
- Copyright (C) 1993-2015 Free Software Foundation, Inc.
+ Copyright (C) 1993-2016 Free Software Foundation, Inc.
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
/* The rest must handle multibyte characters. */
# define ISGRAPH(c) (SINGLE_BYTE_CHAR_P (c) \
- ? (c) > ' ' && !((c) >= 0177 && (c) <= 0237) \
- : 1)
+ ? (c) > ' ' && !((c) >= 0177 && (c) <= 0240) \
+ : graphicp (c))
# define ISPRINT(c) (SINGLE_BYTE_CHAR_P (c) \
? (c) >= ' ' && !((c) >= 0177 && (c) <= 0237) \
- : 1)
+ : printablep (c))
# define ISALNUM(c) (IS_REAL_ASCII (c) \
? (((c) >= 'a' && (c) <= 'z') \
DEBUG_PRINT (" Push frame index: %zd\n", fail_stack.frame); \
PUSH_FAILURE_INT (fail_stack.frame); \
\
- DEBUG_PRINT (" Push string %p: `", string_place); \
+ DEBUG_PRINT (" Push string %p: \"", string_place); \
DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, size2);\
- DEBUG_PRINT ("'\n"); \
+ DEBUG_PRINT ("\"\n"); \
PUSH_FAILURE_POINTER (string_place); \
\
DEBUG_PRINT (" Push pattern %p: ", pattern); \
on_failure_keep_string_jump opcode, and we want to throw away the \
saved NULL, thus retaining our current position in the string. */ \
str = POP_FAILURE_POINTER (); \
- DEBUG_PRINT (" Popping string %p: `", str); \
+ DEBUG_PRINT (" Popping string %p: \"", str); \
DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
- DEBUG_PRINT ("'\n"); \
+ DEBUG_PRINT ("\"\n"); \
\
fail_stack.frame = POP_FAILURE_INT (); \
DEBUG_PRINT (" Popping frame index: %zd\n", fail_stack.frame); \
#define RANGE_TABLE_WORK_ELT(work_area, i) ((work_area).table[i])
/* Bits used to implement the multibyte-part of the various character classes
- such as [:alnum:] in a charset's range table. */
+ such as [:alnum:] in a charset's range table. The code currently assumes
+ that only the low 16 bits are used. */
#define BIT_WORD 0x1
#define BIT_LOWER 0x2
#define BIT_PUNCT 0x4
#define BIT_MULTIBYTE 0x20
#define BIT_ALPHA 0x40
#define BIT_ALNUM 0x80
+#define BIT_GRAPH 0x100
+#define BIT_PRINT 0x200
\f
/* Set the bit for character C in a list. */
{
switch (cc)
{
- case RECC_NONASCII: case RECC_PRINT: case RECC_GRAPH:
+ case RECC_NONASCII:
case RECC_MULTIBYTE: return BIT_MULTIBYTE;
case RECC_ALPHA: return BIT_ALPHA;
case RECC_ALNUM: return BIT_ALNUM;
case RECC_UPPER: return BIT_UPPER;
case RECC_PUNCT: return BIT_PUNCT;
case RECC_SPACE: return BIT_SPACE;
+ case RECC_GRAPH: return BIT_GRAPH;
+ case RECC_PRINT: return BIT_PRINT;
case RECC_ASCII: case RECC_DIGIT: case RECC_XDIGIT: case RECC_CNTRL:
case RECC_BLANK: case RECC_UNIBYTE: case RECC_ERROR: return 0;
default:
DEBUG_PRINT ("The compiled pattern is: ");
DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
- DEBUG_PRINT ("The string to match is: `");
+ DEBUG_PRINT ("The string to match is: \"");
DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
- DEBUG_PRINT ("'\n");
+ DEBUG_PRINT ("\"\n");
/* This loops over pattern commands. It exits by returning from the
function if the match is complete, or it drops through if the match
if (p == pend)
{
- ptrdiff_t dcnt;
-
/* End of pattern means we might have succeeded. */
DEBUG_PRINT ("end of pattern ... ");
longest match, try backtracking. */
if (d != end_match_2)
{
- /* 1 if this match ends in the same string (string1 or string2)
- as the best previous match. */
- boolean same_str_p = (FIRST_STRING_P (match_end)
- == FIRST_STRING_P (d));
- /* 1 if this match is the best seen so far. */
- boolean best_match_p;
-
- /* AIX compiler got confused when this was combined
- with the previous declaration. */
- if (same_str_p)
- best_match_p = d > match_end;
- else
- best_match_p = !FIRST_STRING_P (d);
+ /* True if this match is the best seen so far. */
+ bool best_match_p;
+
+ {
+ /* True if this match ends in the same string (string1
+ or string2) as the best previous match. */
+ bool same_str_p = (FIRST_STRING_P (match_end)
+ == FIRST_STRING_P (d));
+
+ /* AIX compiler got confused when this was combined
+ with the previous declaration. */
+ if (same_str_p)
+ best_match_p = d > match_end;
+ else
+ best_match_p = !FIRST_STRING_P (d);
+ }
DEBUG_PRINT ("backtracking.\n");
nfailure_points_pushed - nfailure_points_popped);
DEBUG_PRINT ("%u registers pushed.\n", num_regs_pushed);
- dcnt = POINTER_TO_OFFSET (d) - pos;
+ ptrdiff_t dcnt = POINTER_TO_OFFSET (d) - pos;
DEBUG_PRINT ("Returning %td from re_match_2.\n", dcnt);
&& buf_ch == '\000'))
goto fail;
- DEBUG_PRINT (" Matched `%d'.\n", *d);
+ DEBUG_PRINT (" Matched \"%d\".\n", *d);
d += buf_charlen;
}
break;
case charset:
case charset_not:
{
- register unsigned int c;
+ register unsigned int c, corig;
boolean not = (re_opcode_t) *(p - 1) == charset_not;
int len;
}
PREFETCH ();
- c = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
+ corig = c = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
if (target_multibyte)
{
int c1;
{
int class_bits = CHARSET_RANGE_TABLE_BITS (&p[-1]);
- if ( (class_bits & BIT_LOWER && ISLOWER (c))
+ if ( (class_bits & BIT_LOWER
+ && (ISLOWER (c)
+ || (corig != c
+ && c == upcase (corig) && ISUPPER(c))))
| (class_bits & BIT_MULTIBYTE)
| (class_bits & BIT_PUNCT && ISPUNCT (c))
| (class_bits & BIT_SPACE && ISSPACE (c))
- | (class_bits & BIT_UPPER && ISUPPER (c))
+ | (class_bits & BIT_UPPER
+ && (ISUPPER (c)
+ || (corig != c
+ && c == downcase (corig) && ISLOWER (c))))
| (class_bits & BIT_WORD && ISWORD (c))
| (class_bits & BIT_ALPHA && ISALPHA (c))
- | (class_bits & BIT_ALNUM && ISALNUM (c)))
+ | (class_bits & BIT_ALNUM && ISALNUM (c))
+ | (class_bits & BIT_GRAPH && ISGRAPH (c))
+ | (class_bits & BIT_PRINT && ISPRINT (c)))
not = !not;
else
CHARSET_LOOKUP_RANGE_TABLE_RAW (not, c, range_table, count);
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d - 1);
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (charpos);
+ UPDATE_SYNTAX_TABLE_FAST (charpos);
#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
#ifdef emacs
- UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+ UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos + 1);
#endif
PREFETCH_NOLIMIT ();
GET_CHAR_AFTER (c2, d, dummy);
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d);
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (charpos);
+ UPDATE_SYNTAX_TABLE_FAST (charpos);
#endif
PREFETCH ();
GET_CHAR_AFTER (c2, d, dummy);
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d) - 1;
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (charpos);
+ UPDATE_SYNTAX_TABLE_FAST (charpos);
#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
PREFETCH_NOLIMIT ();
GET_CHAR_AFTER (c2, d, dummy);
#ifdef emacs
- UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+ UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos);
#endif
s2 = SYNTAX (c2);
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d);
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (charpos);
+ UPDATE_SYNTAX_TABLE_FAST (charpos);
#endif
PREFETCH ();
c2 = RE_STRING_CHAR (d, target_multibyte);
#ifdef emacs
ssize_t offset = PTR_TO_OFFSET (d) - 1;
ssize_t charpos = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (charpos);
+ UPDATE_SYNTAX_TABLE_FAST (charpos);
#endif
GET_CHAR_BEFORE_2 (c1, d, string1, end1, string2, end2);
s1 = SYNTAX (c1);
PREFETCH_NOLIMIT ();
c2 = RE_STRING_CHAR (d, target_multibyte);
#ifdef emacs
- UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
+ UPDATE_SYNTAX_TABLE_FORWARD_FAST (charpos + 1);
#endif
s2 = SYNTAX (c2);
{
ssize_t offset = PTR_TO_OFFSET (d);
ssize_t pos1 = SYNTAX_TABLE_BYTE_TO_CHAR (offset);
- UPDATE_SYNTAX_TABLE (pos1);
+ UPDATE_SYNTAX_TABLE_FAST (pos1);
}
#endif
{