/* String search routines for GNU Emacs.
-Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2014 Free Software
+Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2015 Free Software
Foundation, Inc.
This file is part of GNU Emacs.
return shortage;
}
+/* Like above, but always scan from point and report the
+ resulting position in *CHARPOS and *BYTEPOS. */
+
+ptrdiff_t
+scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
+ ptrdiff_t *bytepos)
+{
+ ptrdiff_t shortage;
+
+ if (count <= 0)
+ *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
+ &shortage, bytepos, 1);
+ else
+ *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
+ &shortage, bytepos, 1);
+ return shortage;
+}
+
/* Like find_newline, but doesn't allow QUITting and doesn't return
SHORTAGE. */
ptrdiff_t
translation. Otherwise set to zero later. */
int char_base = -1;
bool boyer_moore_ok = 1;
+ USE_SAFE_ALLOCA;
/* MULTIBYTE says whether the text to be searched is multibyte.
We must convert PATTERN to match that, or we will not really
raw_pattern_size_byte
= count_size_as_multibyte (SDATA (string),
raw_pattern_size);
- raw_pattern = alloca (raw_pattern_size_byte + 1);
+ raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
copy_text (SDATA (string), raw_pattern,
SCHARS (string), 0, 1);
}
the chosen single-byte character set can possibly match. */
raw_pattern_size = SCHARS (string);
raw_pattern_size_byte = SCHARS (string);
- raw_pattern = alloca (raw_pattern_size + 1);
+ raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
copy_text (SDATA (string), raw_pattern,
SBYTES (string), 1, 0);
}
/* Copy and optionally translate the pattern. */
len = raw_pattern_size;
len_byte = raw_pattern_size_byte;
- patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
+ SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
pat = patbuf;
base_pat = raw_pattern;
if (multibyte)
while (boyer_moore_ok)
{
- if (ASCII_BYTE_P (inverse))
+ if (ASCII_CHAR_P (inverse))
{
if (this_char_base > 0)
boyer_moore_ok = 0;
len_byte = pat - patbuf;
pat = base_pat = patbuf;
- if (boyer_moore_ok)
- return boyer_moore (n, pat, len_byte, trt, inverse_trt,
- pos_byte, lim_byte,
- char_base);
- else
- return simple_search (n, pat, raw_pattern_size, len_byte, trt,
- pos, pos_byte, lim, lim_byte);
+ EMACS_INT result
+ = (boyer_moore_ok
+ ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
+ pos_byte, lim_byte,
+ char_base)
+ : simple_search (n, pat, raw_pattern_size, len_byte, trt,
+ pos, pos_byte, lim, lim_byte));
+ SAFE_FREE ();
+ return result;
}
}
\f
matching with CHAR_BASE are to be checked. */
int ch = -1;
- if (ASCII_BYTE_P (*ptr) || ! multibyte)
+ if (ASCII_CHAR_P (*ptr) || ! multibyte)
ch = *ptr;
else if (char_base
&& ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
{
FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
if (!buf_multibyte)
- c = multibyte_char_to_unibyte (c);
+ c = CHAR_TO_BYTE8 (c);
}
else
{
FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
pos, pos_byte);
if (!buf_multibyte && !ASCII_CHAR_P (c))
- c = multibyte_char_to_unibyte (c);
+ c = CHAR_TO_BYTE8 (c);
}
else
{
prev = Qnil;
- data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
+ USE_SAFE_ALLOCA;
+ SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
len = 0;
for (i = 0; i < search_regs.num_regs; i++)
/* If REUSE is not usable, cons up the values and return them. */
if (! CONSP (reuse))
- return Flist (len, data);
-
- /* If REUSE is a list, store as many value elements as will fit
- into the elements of REUSE. */
- for (i = 0, tail = reuse; CONSP (tail);
- i++, tail = XCDR (tail))
+ reuse = Flist (len, data);
+ else
{
+ /* If REUSE is a list, store as many value elements as will fit
+ into the elements of REUSE. */
+ for (i = 0, tail = reuse; CONSP (tail);
+ i++, tail = XCDR (tail))
+ {
+ if (i < len)
+ XSETCAR (tail, data[i]);
+ else
+ XSETCAR (tail, Qnil);
+ prev = tail;
+ }
+
+ /* If we couldn't fit all value elements into REUSE,
+ cons up the rest of them and add them to the end of REUSE. */
if (i < len)
- XSETCAR (tail, data[i]);
- else
- XSETCAR (tail, Qnil);
- prev = tail;
+ XSETCDR (prev, Flist (len - i, data + i));
}
- /* If we couldn't fit all value elements into REUSE,
- cons up the rest of them and add them to the end of REUSE. */
- if (i < len)
- XSETCDR (prev, Flist (len - i, data + i));
-
+ SAFE_FREE ();
return reuse;
}
CHECK_STRING (string);
- temp = alloca (SBYTES (string) * 2);
+ USE_SAFE_ALLOCA;
+ SAFE_NALLOCA (temp, 2, SBYTES (string));
/* Now copy the data into the new string, inserting escapes. */
*out++ = *in;
}
- return make_specified_string (temp,
- SCHARS (string) + backslashes_added,
- out - temp,
- STRING_MULTIBYTE (string));
+ Lisp_Object result
+ = make_specified_string (temp,
+ SCHARS (string) + backslashes_added,
+ out - temp,
+ STRING_MULTIBYTE (string));
+ SAFE_FREE ();
+ return result;
+}
+
+/* Like find_newline, but doesn't use the cache, and only searches forward. */
+static ptrdiff_t
+find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
+ ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
+ ptrdiff_t *bytepos, bool allow_quit)
+{
+ if (count > 0)
+ {
+ if (!end)
+ end = ZV, end_byte = ZV_BYTE;
+ }
+ else
+ {
+ if (!end)
+ end = BEGV, end_byte = BEGV_BYTE;
+ }
+ if (end_byte == -1)
+ end_byte = CHAR_TO_BYTE (end);
+
+ if (shortage != 0)
+ *shortage = 0;
+
+ immediate_quit = allow_quit;
+
+ if (count > 0)
+ while (start != end)
+ {
+ /* Our innermost scanning loop is very simple; it doesn't know
+ about gaps, buffer ends, or the newline cache. ceiling is
+ the position of the last character before the next such
+ obstacle --- the last character the dumb search loop should
+ examine. */
+ ptrdiff_t tem, ceiling_byte = end_byte - 1;
+
+ if (start_byte == -1)
+ start_byte = CHAR_TO_BYTE (start);
+
+ /* The dumb loop can only scan text stored in contiguous
+ bytes. BUFFER_CEILING_OF returns the last character
+ position that is contiguous, so the ceiling is the
+ position after that. */
+ tem = BUFFER_CEILING_OF (start_byte);
+ ceiling_byte = min (tem, ceiling_byte);
+
+ {
+ /* The termination address of the dumb loop. */
+ unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
+ ptrdiff_t lim_byte = ceiling_byte + 1;
+
+ /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
+ of the base, the cursor, and the next line. */
+ ptrdiff_t base = start_byte - lim_byte;
+ ptrdiff_t cursor, next;
+
+ for (cursor = base; cursor < 0; cursor = next)
+ {
+ /* The dumb loop. */
+ unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
+ next = nl ? nl - lim_addr : 0;
+
+ if (! nl)
+ break;
+ next++;
+
+ if (--count == 0)
+ {
+ immediate_quit = 0;
+ if (bytepos)
+ *bytepos = lim_byte + next;
+ return BYTE_TO_CHAR (lim_byte + next);
+ }
+ }
+
+ start_byte = lim_byte;
+ start = BYTE_TO_CHAR (start_byte);
+ }
+ }
+
+ immediate_quit = 0;
+ if (shortage)
+ *shortage = count;
+ if (bytepos)
+ {
+ *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
+ eassert (*bytepos == CHAR_TO_BYTE (start));
+ }
+ return start;
+}
+
+DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
+ 0, 1, 0,
+ doc: /* Check the newline cache of BUFFER against buffer contents.
+
+BUFFER defaults to the current buffer.
+
+Value is an array of 2 sub-arrays of buffer positions for newlines,
+the first based on the cache, the second based on actually scanning
+the buffer. If the buffer doesn't have a cache, the value is nil. */)
+ (Lisp_Object buffer)
+{
+ struct buffer *buf, *old = NULL;
+ ptrdiff_t shortage, nl_count_cache, nl_count_buf;
+ Lisp_Object cache_newlines, buf_newlines, val;
+ ptrdiff_t from, found, i;
+
+ if (NILP (buffer))
+ buf = current_buffer;
+ else
+ {
+ CHECK_BUFFER (buffer);
+ buf = XBUFFER (buffer);
+ old = current_buffer;
+ }
+ if (buf->base_buffer)
+ buf = buf->base_buffer;
+
+ /* If the buffer doesn't have a newline cache, return nil. */
+ if (NILP (BVAR (buf, cache_long_scans))
+ || buf->newline_cache == NULL)
+ return Qnil;
+
+ /* find_newline can only work on the current buffer. */
+ if (old != NULL)
+ set_buffer_internal_1 (buf);
+
+ /* How many newlines are there according to the cache? */
+ find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
+ TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
+ nl_count_cache = TYPE_MAXIMUM (ptrdiff_t) - shortage;
+
+ /* Create vector and populate it. */
+ cache_newlines = make_uninit_vector (nl_count_cache);
+
+ if (nl_count_cache)
+ {
+ for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
+ {
+ ptrdiff_t from_byte = CHAR_TO_BYTE (from);
+
+ found = find_newline (from, from_byte, 0, -1, 1, &shortage,
+ NULL, true);
+ if (shortage != 0 || i >= nl_count_cache)
+ break;
+ ASET (cache_newlines, i, make_number (found - 1));
+ }
+ /* Fill the rest of slots with an invalid position. */
+ for ( ; i < nl_count_cache; i++)
+ ASET (cache_newlines, i, make_number (-1));
+ }
+
+ /* Now do the same, but without using the cache. */
+ find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
+ TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
+ nl_count_buf = TYPE_MAXIMUM (ptrdiff_t) - shortage;
+ buf_newlines = make_uninit_vector (nl_count_buf);
+ if (nl_count_buf)
+ {
+ for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
+ {
+ ptrdiff_t from_byte = CHAR_TO_BYTE (from);
+
+ found = find_newline1 (from, from_byte, 0, -1, 1, &shortage,
+ NULL, true);
+ if (shortage != 0 || i >= nl_count_buf)
+ break;
+ ASET (buf_newlines, i, make_number (found - 1));
+ }
+ for ( ; i < nl_count_buf; i++)
+ ASET (buf_newlines, i, make_number (-1));
+ }
+
+ /* Construct the value and return it. */
+ val = make_uninit_vector (2);
+ ASET (val, 0, cache_newlines);
+ ASET (val, 1, buf_newlines);
+
+ if (old != NULL)
+ set_buffer_internal_1 (old);
+ return val;
}
\f
void
defsubr (&Smatch_data);
defsubr (&Sset_match_data);
defsubr (&Sregexp_quote);
+ defsubr (&Snewline_cache_check);
}