]> code.delx.au - gnu-emacs/commitdiff
Fix "[:upper:]" for non-ASCII characters
authorEli Zaretskii <eliz@gnu.org>
Sat, 20 Feb 2016 11:03:20 +0000 (13:03 +0200)
committerEli Zaretskii <eliz@gnu.org>
Sat, 20 Feb 2016 11:03:20 +0000 (13:03 +0200)
* src/regex.c (re_match_2_internal): Support [:upper:] and
[:lower:] for non-ASCII characters.  (Bug#18150)

src/regex.c

index dd3f2b3cd67d3cf203f79cc7888cacd4e956cef5..164eb4612ae01cb901caa6664b39b35a4fcaea4d 100644 (file)
@@ -5444,7 +5444,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const_re_char *string1,
        case charset:
        case charset_not:
          {
-           register unsigned int c;
+           register unsigned int c, corig;
            boolean not = (re_opcode_t) *(p - 1) == charset_not;
            int len;
 
@@ -5473,7 +5473,7 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const_re_char *string1,
              }
 
            PREFETCH ();
-           c = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
+           corig = c = RE_STRING_CHAR_AND_LENGTH (d, len, target_multibyte);
            if (target_multibyte)
              {
                int c1;
@@ -5517,11 +5517,17 @@ re_match_2_internal (struct re_pattern_buffer *bufp, const_re_char *string1,
              {
                int class_bits = CHARSET_RANGE_TABLE_BITS (&p[-1]);
 
-               if (  (class_bits & BIT_LOWER && ISLOWER (c))
+               if (  (class_bits & BIT_LOWER
+                      && (ISLOWER (c)
+                          || (corig != c
+                              && c == upcase (corig) && ISUPPER(c))))
                    | (class_bits & BIT_MULTIBYTE)
                    | (class_bits & BIT_PUNCT && ISPUNCT (c))
                    | (class_bits & BIT_SPACE && ISSPACE (c))
-                   | (class_bits & BIT_UPPER && ISUPPER (c))
+                   | (class_bits & BIT_UPPER
+                      && (ISUPPER (c)
+                          || (corig != c
+                              && c == downcase (corig) && ISLOWER (c))))
                    | (class_bits & BIT_WORD  && ISWORD  (c))
                    | (class_bits & BIT_ALPHA && ISALPHA (c))
                    | (class_bits & BIT_ALNUM && ISALNUM (c))