X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/7b1d7d664bf0342b32fb1175d418e5bef2311310..9416ae448e61ef1478a7e7e07bdfa25273095811:/src/regex.c diff --git a/src/regex.c b/src/regex.c index d6b61a1c18..ae80ad0cee 100644 --- a/src/regex.c +++ b/src/regex.c @@ -181,6 +181,51 @@ char *malloc (); char *realloc (); # endif +/* When used in Emacs's lib-src, we need xmalloc and xrealloc. */ + +void * +xmalloc (size) + size_t size; +{ + register void *val; + val = (void *) malloc (size); + if (!val && size) + { + write (2, "virtual memory exhausted\n", 25); + exit (1); + } + return val; +} + +void * +xrealloc (block, size) + void *block; + size_t size; +{ + register void *val; + /* We must call malloc explicitly when BLOCK is 0, since some + reallocs don't do this. */ + if (! block) + val = (void *) malloc (size); + else + val = (void *) realloc (block, size); + if (!val && size) + { + write (2, "virtual memory exhausted\n", 25); + exit (1); + } + return val; +} + +# ifdef malloc +# undef malloc +# endif +# define malloc xmalloc +# ifdef realloc +# undef realloc +# endif +# define realloc xrealloc + /* When used in Emacs's lib-src, we need to get bzero and bcopy somehow. If nothing else has been done, use the method below. */ # ifdef INHIBIT_STRING_HEADER @@ -2067,10 +2112,10 @@ extend_range_table_work_area (work_area) work_area->allocated += 16 * sizeof (int); if (work_area->table) work_area->table - = (int *) xrealloc (work_area->table, work_area->allocated); + = (int *) realloc (work_area->table, work_area->allocated); else work_area->table - = (int *) xalloc (work_area->allocated); + = (int *) malloc (work_area->allocated); } #ifdef emacs @@ -2485,6 +2530,7 @@ regex_compile (pattern, size, syntax, bufp) bufp->syntax = syntax; bufp->fastmap_accurate = 0; bufp->not_bol = bufp->not_eol = 0; + bufp->used_syntax = 0; /* Set `used' to zero, so that if we return an error, the pattern printer (for debugging) will think there's no pattern. We reset it @@ -2897,6 +2943,14 @@ regex_compile (pattern, size, syntax, bufp) SET_LIST_BIT (translated); } + /* In most cases the matching rule for char classes + only uses the syntax table for multibyte chars, + so that the content of the syntax-table it is not + hardcoded in the range_table. SPACE and WORD are + the two exceptions. */ + if ((1 << cc) & ((1 << RECC_SPACE) | (1 << RECC_WORD))) + bufp->used_syntax = 1; + /* Repeat the loop. */ continue; } @@ -3608,13 +3662,13 @@ regex_compile (pattern, size, syntax, bufp) if (! fail_stack.stack) fail_stack.stack - = (fail_stack_elt_t *) xmalloc (fail_stack.size - * sizeof (fail_stack_elt_t)); + = (fail_stack_elt_t *) malloc (fail_stack.size + * sizeof (fail_stack_elt_t)); else fail_stack.stack - = (fail_stack_elt_t *) xrealloc (fail_stack.stack, - (fail_stack.size - * sizeof (fail_stack_elt_t))); + = (fail_stack_elt_t *) realloc (fail_stack.stack, + (fail_stack.size + * sizeof (fail_stack_elt_t))); } regex_grow_registers (num_regs); @@ -3832,11 +3886,13 @@ analyse_first (p, pend, fastmap, multibyte) if (fastmap) { int c = RE_STRING_CHAR (p + 1, pend - p); - + /* When fast-scanning, the fastmap can be indexed either with + a char (smaller than 256) or with the first byte of + a char's byte sequence. So we have to conservatively add + both to the table. */ if (SINGLE_BYTE_CHAR_P (c)) fastmap[c] = 1; - else - fastmap[p[1]] = 1; + fastmap[p[1]] = 1; } break; @@ -3854,6 +3910,10 @@ analyse_first (p, pend, fastmap, multibyte) So any that are not listed in the charset are possible matches, even in multibyte buffers. */ if (!fastmap) break; + /* We don't need to mark LEADING_CODE_8_BIT_CONTROL specially + because it will automatically be set when needed by virtue of + being larger than the highest char of its charset (0xbf) but + smaller than (1<= 0; j--) if (!!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) ^ not) - fastmap[j] = 1; + { + fastmap[j] = 1; +#ifdef emacs + if (j >= 0x80 && j < 0xa0) + fastmap[LEADING_CODE_8_BIT_CONTROL] = 1; +#endif + } if ((not && multibyte) /* Any character set can possibly contain a character @@ -4307,11 +4373,33 @@ re_search_2 (bufp, str1, size1, str2, size2, startpos, range, regs, stop) } } else - while (range > lim && !fastmap[*d]) + do { - d++; - range--; - } + re_char *d_start = d; + while (range > lim && !fastmap[*d]) + { + d++; + range--; + } +#ifdef emacs + if (multibyte && range > lim) + { + /* Check that we are at the beginning of a char. */ + int at_boundary; + AT_CHAR_BOUNDARY_P (at_boundary, d, d_start); + if (at_boundary) + break; + else + { /* We have matched an internal byte of a char + rather than the leading byte, so it's a false + positive: we should keep scanning. */ + d++; range--; + } + } + else +#endif + break; + } while (1); startpos += irange - range; } @@ -6152,6 +6240,10 @@ re_compile_pattern (pattern, length, bufp) { reg_errcode_t ret; +#ifdef emacs + gl_state.current_syntax_table = current_buffer->syntax_table; +#endif + /* GNU code is written to assume at least RE_NREGS registers will be set (and at least one extra will be -1). */ bufp->regs_allocated = REGS_UNALLOCATED; @@ -6292,15 +6384,15 @@ regcomp (preg, pattern, cflags) preg->used = 0; /* Try to allocate space for the fastmap. */ - preg->fastmap = (char *) xmalloc (1 << BYTEWIDTH); + preg->fastmap = (char *) malloc (1 << BYTEWIDTH); if (cflags & REG_ICASE) { unsigned i; preg->translate - = (RE_TRANSLATE_TYPE) xmalloc (CHAR_SET_SIZE - * sizeof (*(RE_TRANSLATE_TYPE)0)); + = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE + * sizeof (*(RE_TRANSLATE_TYPE)0)); if (preg->translate == NULL) return (int) REG_ESPACE;