]> code.delx.au - gnu-emacs/blobdiff - src/regex.c
Add fullscreen_hook.
[gnu-emacs] / src / regex.c
index 459c0ab26b1ccf54e1f577efe678e385b5e0f6b3..ae80ad0cee8b7e56be1624732ac8932f45a9e789 100644 (file)
@@ -2,7 +2,8 @@
    0.12.  (Implements POSIX draft P1003.2/D11.2, except for some of the
    internationalization features.)
 
-   Copyright (C) 1993,94,95,96,97,98,99,2000,04  Free Software Foundation, Inc.
+   Copyright (C) 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
+                 2002, 2003, 2004, 2005, 2006  Free Software Foundation, Inc.
 
    This program is free software; you can redistribute it and/or modify
    it under the terms of the GNU General Public License as published by
@@ -16,7 +17,7 @@
 
    You should have received a copy of the GNU General Public License
    along with this program; if not, write to the Free Software
-   Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307,
+   Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301,
    USA.         */
 
 /* TODO:
@@ -180,6 +181,51 @@ char *malloc ();
 char *realloc ();
 # endif
 
+/* When used in Emacs's lib-src, we need xmalloc and xrealloc. */
+
+void *
+xmalloc (size)
+     size_t size;
+{
+  register void *val;
+  val = (void *) malloc (size);
+  if (!val && size)
+    {
+      write (2, "virtual memory exhausted\n", 25);
+      exit (1);
+    }
+  return val;
+}
+
+void *
+xrealloc (block, size)
+     void *block;
+     size_t size;
+{
+  register void *val;
+  /* We must call malloc explicitly when BLOCK is 0, since some
+     reallocs don't do this.  */
+  if (! block)
+    val = (void *) malloc (size);
+  else
+    val = (void *) realloc (block, size);
+  if (!val && size)
+    {
+      write (2, "virtual memory exhausted\n", 25);
+      exit (1);
+    }
+  return val;
+}
+
+# ifdef malloc
+#  undef malloc
+# endif
+# define malloc xmalloc
+# ifdef realloc
+#  undef realloc
+# endif
+# define realloc xrealloc
+
 /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow.
    If nothing else has been done, use the method below.  */
 # ifdef INHIBIT_STRING_HEADER
@@ -1264,9 +1310,9 @@ static re_char *whitespace_regexp;
 
 void
 re_set_whitespace_regexp (regexp)
-     re_char *regexp;
+     const char *regexp;
 {
-  whitespace_regexp = regexp;
+  whitespace_regexp = (re_char *) regexp;
 }
 WEAK_ALIAS (__re_set_syntax, re_set_syntax)
 \f
@@ -1873,8 +1919,9 @@ typedef struct
 /* The next available element.  */
 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
 
-/* Explicit quit checking is only used on NTemacs.  */
-#if defined WINDOWSNT && defined emacs && defined QUIT
+/* Explicit quit checking is only used on NTemacs and whenever we
+   use polling to process input events.  */
+#if defined emacs && (defined WINDOWSNT || defined SYNC_INPUT) && defined QUIT
 extern int immediate_quit;
 # define IMMEDIATE_QUIT_CHECK                  \
     do {                                       \
@@ -1950,28 +1997,27 @@ struct range_table_work_area
 
 /* Get the next unsigned number in the uncompiled pattern.  */
 #define GET_UNSIGNED_NUMBER(num)                                       \
- do { if (p != pend)                                                   \
-     {                                                                 \
-       PATFETCH (c);                                                   \
-       if (c == ' ')                                                   \
-        FREE_STACK_RETURN (REG_BADBR);                                 \
-       while ('0' <= c && c <= '9')                                    \
-        {                                                              \
-           int prev;                                                   \
-          if (num < 0)                                                 \
-            num = 0;                                                   \
-          prev = num;                                                  \
-          num = num * 10 + c - '0';                                    \
-          if (num / 10 != prev)                                        \
-            FREE_STACK_RETURN (REG_BADBR);                             \
-          if (p == pend)                                               \
-            break;                                                     \
-          PATFETCH (c);                                                \
-        }                                                              \
-       if (c == ' ')                                                   \
-        FREE_STACK_RETURN (REG_BADBR);                                 \
-       }                                                               \
-    } while (0)
+  do {                                                                 \
+    if (p == pend)                                                     \
+      FREE_STACK_RETURN (REG_EBRACE);                                  \
+    else                                                               \
+      {                                                                        \
+       PATFETCH (c);                                                   \
+       while ('0' <= c && c <= '9')                                    \
+         {                                                             \
+           int prev;                                                   \
+           if (num < 0)                                                \
+             num = 0;                                                  \
+           prev = num;                                                 \
+           num = num * 10 + c - '0';                                   \
+           if (num / 10 != prev)                                       \
+             FREE_STACK_RETURN (REG_BADBR);                            \
+           if (p == pend)                                              \
+             FREE_STACK_RETURN (REG_EBRACE);                           \
+           PATFETCH (c);                                               \
+         }                                                             \
+      }                                                                        \
+  } while (0)
 \f
 #if ! WIDE_CHAR_SUPPORT
 
@@ -2484,6 +2530,7 @@ regex_compile (pattern, size, syntax, bufp)
   bufp->syntax = syntax;
   bufp->fastmap_accurate = 0;
   bufp->not_bol = bufp->not_eol = 0;
+  bufp->used_syntax = 0;
 
   /* Set `used' to zero, so that if we return an error, the pattern
      printer (for debugging) will think there's no pattern.  We reset it
@@ -2545,8 +2592,8 @@ regex_compile (pattern, size, syntax, bufp)
            re_char *p1 = p;
 
            /* If there's no special whitespace regexp, treat
-              spaces normally.  */
-           if (!whitespace_regexp)
+              spaces normally.  And don't try to do this recursively.  */
+           if (!whitespace_regexp || in_subpattern)
              goto normal_char;
 
            /* Peek past following spaces.  */
@@ -2558,8 +2605,8 @@ regex_compile (pattern, size, syntax, bufp)
              }
            /* If the spaces are followed by a repetition op,
               treat them normally.  */
-           if (p1 == pend
-               || (*p1 == '*' || *p1 == '+' || *p1 == '?'
+           if (p1 != pend
+               && (*p1 == '*' || *p1 == '+' || *p1 == '?'
                    || (*p1 == '\\' && p1 + 1 != pend && p1[1] == '{')))
              goto normal_char;
 
@@ -2896,6 +2943,14 @@ regex_compile (pattern, size, syntax, bufp)
                              SET_LIST_BIT (translated);
                          }
 
+                       /* In most cases the matching rule for char classes
+                          only uses the syntax table for multibyte chars,
+                          so that the content of the syntax-table it is not
+                          hardcoded in the range_table.  SPACE and WORD are
+                          the two exceptions.  */
+                       if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD)))
+                         bufp->used_syntax = 1;
+
                        /* Repeat the loop. */
                        continue;
                      }
@@ -3234,9 +3289,6 @@ regex_compile (pattern, size, syntax, bufp)
 
                beg_interval = p;
 
-               if (p == pend)
-                 FREE_STACK_RETURN (REG_EBRACE);
-
                GET_UNSIGNED_NUMBER (lower_bound);
 
                if (c == ',')
@@ -3253,7 +3305,8 @@ regex_compile (pattern, size, syntax, bufp)
                  {
                    if (c != '\\')
                      FREE_STACK_RETURN (REG_BADBR);
-
+                   if (p == pend)
+                     FREE_STACK_RETURN (REG_EESCAPE);
                    PATFETCH (c);
                  }
 
@@ -3833,11 +3886,13 @@ analyse_first (p, pend, fastmap, multibyte)
          if (fastmap)
            {
              int c = RE_STRING_CHAR (p + 1, pend - p);
-
+             /* When fast-scanning, the fastmap can be indexed either with
+                a char (smaller than 256) or with the first byte of
+                a char's byte sequence.  So we have to conservatively add
+                both to the table.  */
              if (SINGLE_BYTE_CHAR_P (c))
                fastmap[c] = 1;
-             else
-               fastmap[p[1]] = 1;
+             fastmap[p[1]] = 1;
            }
          break;
 
@@ -3855,6 +3910,10 @@ analyse_first (p, pend, fastmap, multibyte)
             So any that are not listed in the charset
             are possible matches, even in multibyte buffers.  */
          if (!fastmap) break;
+         /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially
+            because it will automatically be set when needed by virtue of
+            being larger than the highest char of its charset (0xbf) but
+            smaller than (1<<BYTEWIDTH).  */
          for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH;
               j < (1 << BYTEWIDTH); j++)
            fastmap[j] = 1;
@@ -3865,7 +3924,13 @@ analyse_first (p, pend, fastmap, multibyte)
          for (j = CHARSET_BITMAP_SIZE (&p[-1]) * BYTEWIDTH - 1, p++;
               j >= 0; j--)
            if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not)
-             fastmap[j] = 1;
+             {
+               fastmap[j] = 1;
+#ifdef emacs
+               if (j >= 0x80 && j < 0xa0)
+                 fastmap[LEADING_CODE_8_BIT_CONTROL] = 1;
+#endif
+             }
 
          if ((not && multibyte)
              /* Any character set can possibly contain a character
@@ -4293,19 +4358,48 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop)
                        d += buf_charlen;
                      }
                  else
-                   while (range > lim
-                          && !fastmap[RE_TRANSLATE (translate, *d)])
+                   {
+                     /* Convert *d to integer to shut up GCC's
+                        whining about comparison that is always
+                        true.  */
+                     int di = *d;
+
+                     while (range > lim
+                            && !fastmap[RE_TRANSLATE (translate, di)])
+                       {
+                         di = *(++d);
+                         range--;
+                       }
+                   }
+               }
+             else
+               do
+                 {
+                   re_char *d_start = d;
+                   while (range > lim && !fastmap[*d])
                      {
                        d++;
                        range--;
                      }
-               }
-             else
-               while (range > lim && !fastmap[*d])
-                 {
-                   d++;
-                   range--;
-                 }
+#ifdef emacs
+                   if (multibyte && range > lim)
+                     {
+                       /* Check that we are at the beginning of a char.  */
+                       int at_boundary;
+                       AT_CHAR_BOUNDARY_P (at_boundary, d, d_start);
+                       if (at_boundary)
+                         break;
+                       else
+                         { /* We have matched an internal byte of a char
+                              rather than the leading byte, so it's a false
+                              positive: we should keep scanning.  */
+                           d++; range--;
+                         }
+                     }
+                   else
+#endif
+                     break;
+                 } while (1);
 
              startpos += irange - range;
            }
@@ -5247,8 +5341,13 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
              else
                do
                  {
+                   /* Avoid compiler whining about comparison being
+                      always true.  */
+                   int di;
+
                    PREFETCH ();
-                   if (RE_TRANSLATE (translate, *d) != *p++)
+                   di = *d;
+                   if (RE_TRANSLATE (translate, di) != *p++)
                      {
                        d = dfail;
                        goto fail;
@@ -5626,7 +5725,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
           the repetition text and either the following jump or
           pop_failure_jump back to this on_failure_jump.  */
        case on_failure_jump:
-         IMMEDIATE_QUIT_CHECK;
          EXTRACT_NUMBER_AND_INCR (mcnt, p);
          DEBUG_PRINT3 ("EXECUTING on_failure_jump %d (to %p):\n",
                        mcnt, p + mcnt);
@@ -5642,7 +5740,6 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
           then we can use a non-backtracking loop based on
           on_failure_keep_string_jump instead of on_failure_jump.  */
        case on_failure_jump_smart:
-         IMMEDIATE_QUIT_CHECK;
          EXTRACT_NUMBER_AND_INCR (mcnt, p);
          DEBUG_PRINT3 ("EXECUTING on_failure_jump_smart %d (to %p).\n",
                        mcnt, p + mcnt);
@@ -5863,7 +5960,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                  PREFETCH_NOLIMIT ();
                  c2 = RE_STRING_CHAR (d, dend - d);
 #ifdef emacs
-                 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+                 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
 #endif
                  s2 = SYNTAX (c2);
 
@@ -5950,7 +6047,7 @@ re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
                  PREFETCH_NOLIMIT ();
                  c2 = RE_STRING_CHAR (d, dend - d);
 #ifdef emacs
-                 UPDATE_SYNTAX_TABLE_FORWARD (charpos);
+                 UPDATE_SYNTAX_TABLE_FORWARD (charpos + 1);
 #endif
                  s2 = SYNTAX (c2);
 
@@ -6143,6 +6240,10 @@ re_compile_pattern (pattern, length, bufp)
 {
   reg_errcode_t ret;
 
+#ifdef emacs
+  gl_state.current_syntax_table = current_buffer->syntax_table;
+#endif
+
   /* GNU code is written to assume at least RE_NREGS registers will be set
      (and at least one extra will be -1).  */
   bufp->regs_allocated = REGS_UNALLOCATED;