X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/e0af409500d5f44b34a6e8d971f0c7abe1d747fe..d3155315c85212f224fc5df0239182dafdfd6284:/src/search.c diff --git a/src/search.c b/src/search.c index 9bec825abc..5da99c408a 100644 --- a/src/search.c +++ b/src/search.c @@ -1,6 +1,6 @@ /* String search routines for GNU Emacs. -Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2014 Free Software +Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2015 Free Software Foundation, Inc. This file is part of GNU Emacs. @@ -84,12 +84,6 @@ static struct re_registers search_regs; Qnil if no searching has been done yet. */ static Lisp_Object last_thing_searched; -/* Error condition signaled when regexp compile_pattern fails. */ -static Lisp_Object Qinvalid_regexp; - -/* Error condition used for failing searches. */ -static Lisp_Object Qsearch_failed; - static void set_search_regs (ptrdiff_t, ptrdiff_t); static void save_search_regs (void); static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t, @@ -465,17 +459,18 @@ matched by parenthesis constructs in the pattern. */) return string_match_1 (regexp, string, start, 1); } -/* Match REGEXP against STRING, searching all of STRING, - and return the index of the match, or negative on failure. - This does not clobber the match data. */ +/* Match REGEXP against STRING using translation table TABLE, + searching all of STRING, and return the index of the match, + or negative on failure. This does not clobber the match data. */ ptrdiff_t -fast_string_match (Lisp_Object regexp, Lisp_Object string) +fast_string_match_internal (Lisp_Object regexp, Lisp_Object string, + Lisp_Object table) { ptrdiff_t val; struct re_pattern_buffer *bufp; - bufp = compile_pattern (regexp, 0, Qnil, + bufp = compile_pattern (regexp, 0, table, 0, STRING_MULTIBYTE (string)); immediate_quit = 1; re_match_object = string; @@ -510,26 +505,6 @@ fast_c_string_match_ignore_case (Lisp_Object regexp, return val; } -/* Like fast_string_match but ignore case. */ - -ptrdiff_t -fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string) -{ - ptrdiff_t val; - struct re_pattern_buffer *bufp; - - bufp = compile_pattern (regexp, 0, Vascii_canon_table, - 0, STRING_MULTIBYTE (string)); - immediate_quit = 1; - re_match_object = string; - - val = re_search (bufp, SSDATA (string), - SBYTES (string), 0, - SBYTES (string), 0); - immediate_quit = 0; - return val; -} - /* Match REGEXP against the characters after POS to LIMIT, and return the number of matched characters. If STRING is non-nil, match against the characters in it. In that case, POS and LIMIT are @@ -731,6 +706,12 @@ find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end, start, &next_change); if (result) { + /* When the cache revalidation is deferred, + next-change might point beyond ZV, which will + cause assertion violation in CHAR_TO_BYTE below. + Limit next_change to ZV to avoid that. */ + if (next_change > ZV) + next_change = ZV; start = next_change; lim1 = next_change = end; } @@ -985,6 +966,24 @@ scan_newline (ptrdiff_t start, ptrdiff_t start_byte, return shortage; } +/* Like above, but always scan from point and report the + resulting position in *CHARPOS and *BYTEPOS. */ + +ptrdiff_t +scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos, + ptrdiff_t *bytepos) +{ + ptrdiff_t shortage; + + if (count <= 0) + *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1, + &shortage, bytepos, 1); + else + *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count, + &shortage, bytepos, 1); + return shortage; +} + /* Like find_newline, but doesn't allow QUITting and doesn't return SHORTAGE. */ ptrdiff_t @@ -1318,6 +1317,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, translation. Otherwise set to zero later. */ int char_base = -1; bool boyer_moore_ok = 1; + USE_SAFE_ALLOCA; /* MULTIBYTE says whether the text to be searched is multibyte. We must convert PATTERN to match that, or we will not really @@ -1335,7 +1335,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, raw_pattern_size_byte = count_size_as_multibyte (SDATA (string), raw_pattern_size); - raw_pattern = alloca (raw_pattern_size_byte + 1); + raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1); copy_text (SDATA (string), raw_pattern, SCHARS (string), 0, 1); } @@ -1349,7 +1349,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, the chosen single-byte character set can possibly match. */ raw_pattern_size = SCHARS (string); raw_pattern_size_byte = SCHARS (string); - raw_pattern = alloca (raw_pattern_size + 1); + raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1); copy_text (SDATA (string), raw_pattern, SBYTES (string), 1, 0); } @@ -1357,7 +1357,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, /* Copy and optionally translate the pattern. */ len = raw_pattern_size; len_byte = raw_pattern_size_byte; - patbuf = alloca (len * MAX_MULTIBYTE_LENGTH); + SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len); pat = patbuf; base_pat = raw_pattern; if (multibyte) @@ -1416,7 +1416,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, while (boyer_moore_ok) { - if (ASCII_BYTE_P (inverse)) + if (ASCII_CHAR_P (inverse)) { if (this_char_base > 0) boyer_moore_ok = 0; @@ -1497,13 +1497,15 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte, len_byte = pat - patbuf; pat = base_pat = patbuf; - if (boyer_moore_ok) - return boyer_moore (n, pat, len_byte, trt, inverse_trt, - pos_byte, lim_byte, - char_base); - else - return simple_search (n, pat, raw_pattern_size, len_byte, trt, - pos, pos_byte, lim, lim_byte); + EMACS_INT result + = (boyer_moore_ok + ? boyer_moore (n, pat, len_byte, trt, inverse_trt, + pos_byte, lim_byte, + char_base) + : simple_search (n, pat, raw_pattern_size, len_byte, trt, + pos, pos_byte, lim, lim_byte)); + SAFE_FREE (); + return result; } } @@ -1827,7 +1829,7 @@ boyer_moore (EMACS_INT n, unsigned char *base_pat, matching with CHAR_BASE are to be checked. */ int ch = -1; - if (ASCII_BYTE_P (*ptr) || ! multibyte) + if (ASCII_CHAR_P (*ptr) || ! multibyte) ch = *ptr; else if (char_base && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1]))) @@ -2596,7 +2598,7 @@ since only regular expressions have distinguished subexpressions. */) { FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte); if (!buf_multibyte) - c = multibyte_char_to_unibyte (c); + c = CHAR_TO_BYTE8 (c); } else { @@ -2619,7 +2621,7 @@ since only regular expressions have distinguished subexpressions. */) FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte); if (!buf_multibyte && !ASCII_CHAR_P (c)) - c = multibyte_char_to_unibyte (c); + c = CHAR_TO_BYTE8 (c); } else { @@ -2809,7 +2811,8 @@ Return value is undefined if the last search failed. */) prev = Qnil; - data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data); + USE_SAFE_ALLOCA; + SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1); len = 0; for (i = 0; i < search_regs.num_regs; i++) @@ -2852,25 +2855,28 @@ Return value is undefined if the last search failed. */) /* If REUSE is not usable, cons up the values and return them. */ if (! CONSP (reuse)) - return Flist (len, data); - - /* If REUSE is a list, store as many value elements as will fit - into the elements of REUSE. */ - for (i = 0, tail = reuse; CONSP (tail); - i++, tail = XCDR (tail)) + reuse = Flist (len, data); + else { + /* If REUSE is a list, store as many value elements as will fit + into the elements of REUSE. */ + for (i = 0, tail = reuse; CONSP (tail); + i++, tail = XCDR (tail)) + { + if (i < len) + XSETCAR (tail, data[i]); + else + XSETCAR (tail, Qnil); + prev = tail; + } + + /* If we couldn't fit all value elements into REUSE, + cons up the rest of them and add them to the end of REUSE. */ if (i < len) - XSETCAR (tail, data[i]); - else - XSETCAR (tail, Qnil); - prev = tail; + XSETCDR (prev, Flist (len - i, data + i)); } - /* If we couldn't fit all value elements into REUSE, - cons up the rest of them and add them to the end of REUSE. */ - if (i < len) - XSETCDR (prev, Flist (len - i, data + i)); - + SAFE_FREE (); return reuse; } @@ -3075,7 +3081,8 @@ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0, CHECK_STRING (string); - temp = alloca (SBYTES (string) * 2); + USE_SAFE_ALLOCA; + SAFE_NALLOCA (temp, 2, SBYTES (string)); /* Now copy the data into the new string, inserting escapes. */ @@ -3093,10 +3100,194 @@ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0, *out++ = *in; } - return make_specified_string (temp, - SCHARS (string) + backslashes_added, - out - temp, - STRING_MULTIBYTE (string)); + Lisp_Object result + = make_specified_string (temp, + SCHARS (string) + backslashes_added, + out - temp, + STRING_MULTIBYTE (string)); + SAFE_FREE (); + return result; +} + +/* Like find_newline, but doesn't use the cache, and only searches forward. */ +static ptrdiff_t +find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end, + ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage, + ptrdiff_t *bytepos, bool allow_quit) +{ + if (count > 0) + { + if (!end) + end = ZV, end_byte = ZV_BYTE; + } + else + { + if (!end) + end = BEGV, end_byte = BEGV_BYTE; + } + if (end_byte == -1) + end_byte = CHAR_TO_BYTE (end); + + if (shortage != 0) + *shortage = 0; + + immediate_quit = allow_quit; + + if (count > 0) + while (start != end) + { + /* Our innermost scanning loop is very simple; it doesn't know + about gaps, buffer ends, or the newline cache. ceiling is + the position of the last character before the next such + obstacle --- the last character the dumb search loop should + examine. */ + ptrdiff_t tem, ceiling_byte = end_byte - 1; + + if (start_byte == -1) + start_byte = CHAR_TO_BYTE (start); + + /* The dumb loop can only scan text stored in contiguous + bytes. BUFFER_CEILING_OF returns the last character + position that is contiguous, so the ceiling is the + position after that. */ + tem = BUFFER_CEILING_OF (start_byte); + ceiling_byte = min (tem, ceiling_byte); + + { + /* The termination address of the dumb loop. */ + unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1; + ptrdiff_t lim_byte = ceiling_byte + 1; + + /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE) + of the base, the cursor, and the next line. */ + ptrdiff_t base = start_byte - lim_byte; + ptrdiff_t cursor, next; + + for (cursor = base; cursor < 0; cursor = next) + { + /* The dumb loop. */ + unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor); + next = nl ? nl - lim_addr : 0; + + if (! nl) + break; + next++; + + if (--count == 0) + { + immediate_quit = 0; + if (bytepos) + *bytepos = lim_byte + next; + return BYTE_TO_CHAR (lim_byte + next); + } + } + + start_byte = lim_byte; + start = BYTE_TO_CHAR (start_byte); + } + } + + immediate_quit = 0; + if (shortage) + *shortage = count; + if (bytepos) + { + *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte; + eassert (*bytepos == CHAR_TO_BYTE (start)); + } + return start; +} + +DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check, + 0, 1, 0, + doc: /* Check the newline cache of BUFFER against buffer contents. + +BUFFER defaults to the current buffer. + +Value is an array of 2 sub-arrays of buffer positions for newlines, +the first based on the cache, the second based on actually scanning +the buffer. If the buffer doesn't have a cache, the value is nil. */) + (Lisp_Object buffer) +{ + struct buffer *buf, *old = NULL; + ptrdiff_t shortage, nl_count_cache, nl_count_buf; + Lisp_Object cache_newlines, buf_newlines, val; + ptrdiff_t from, found, i; + + if (NILP (buffer)) + buf = current_buffer; + else + { + CHECK_BUFFER (buffer); + buf = XBUFFER (buffer); + old = current_buffer; + } + if (buf->base_buffer) + buf = buf->base_buffer; + + /* If the buffer doesn't have a newline cache, return nil. */ + if (NILP (BVAR (buf, cache_long_scans)) + || buf->newline_cache == NULL) + return Qnil; + + /* find_newline can only work on the current buffer. */ + if (old != NULL) + set_buffer_internal_1 (buf); + + /* How many newlines are there according to the cache? */ + find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE, + TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true); + nl_count_cache = TYPE_MAXIMUM (ptrdiff_t) - shortage; + + /* Create vector and populate it. */ + cache_newlines = make_uninit_vector (nl_count_cache); + + if (nl_count_cache) + { + for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++) + { + ptrdiff_t from_byte = CHAR_TO_BYTE (from); + + found = find_newline (from, from_byte, 0, -1, 1, &shortage, + NULL, true); + if (shortage != 0 || i >= nl_count_cache) + break; + ASET (cache_newlines, i, make_number (found - 1)); + } + /* Fill the rest of slots with an invalid position. */ + for ( ; i < nl_count_cache; i++) + ASET (cache_newlines, i, make_number (-1)); + } + + /* Now do the same, but without using the cache. */ + find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE, + TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true); + nl_count_buf = TYPE_MAXIMUM (ptrdiff_t) - shortage; + buf_newlines = make_uninit_vector (nl_count_buf); + if (nl_count_buf) + { + for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++) + { + ptrdiff_t from_byte = CHAR_TO_BYTE (from); + + found = find_newline1 (from, from_byte, 0, -1, 1, &shortage, + NULL, true); + if (shortage != 0 || i >= nl_count_buf) + break; + ASET (buf_newlines, i, make_number (found - 1)); + } + for ( ; i < nl_count_buf; i++) + ASET (buf_newlines, i, make_number (-1)); + } + + /* Construct the value and return it. */ + val = make_uninit_vector (2); + ASET (val, 0, cache_newlines); + ASET (val, 1, buf_newlines); + + if (old != NULL) + set_buffer_internal_1 (old); + return val; } void @@ -3119,7 +3310,10 @@ syms_of_search (void) } searchbuf_head = &searchbufs[0]; + /* Error condition used for failing searches. */ DEFSYM (Qsearch_failed, "search-failed"); + + /* Error condition signaled when regexp compile_pattern fails. */ DEFSYM (Qinvalid_regexp, "invalid-regexp"); Fput (Qsearch_failed, Qerror_conditions, @@ -3170,4 +3364,5 @@ is to bind it with `let' around a small expression. */); defsubr (&Smatch_data); defsubr (&Sset_match_data); defsubr (&Sregexp_quote); + defsubr (&Snewline_cache_check); }