X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/e0af409500d5f44b34a6e8d971f0c7abe1d747fe..d3155315c85212f224fc5df0239182dafdfd6284:/src/search.c

diff --git a/src/search.c b/src/search.c
index 9bec825abc..5da99c408a 100644
--- a/src/search.c
+++ b/src/search.c
@@ -1,6 +1,6 @@
 /* String search routines for GNU Emacs.
 
-Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2014 Free Software
+Copyright (C) 1985-1987, 1993-1994, 1997-1999, 2001-2015 Free Software
 Foundation, Inc.
 
 This file is part of GNU Emacs.
@@ -84,12 +84,6 @@ static struct re_registers search_regs;
    Qnil if no searching has been done yet.  */
 static Lisp_Object last_thing_searched;
 
-/* Error condition signaled when regexp compile_pattern fails.  */
-static Lisp_Object Qinvalid_regexp;
-
-/* Error condition used for failing searches.  */
-static Lisp_Object Qsearch_failed;
-
 static void set_search_regs (ptrdiff_t, ptrdiff_t);
 static void save_search_regs (void);
 static EMACS_INT simple_search (EMACS_INT, unsigned char *, ptrdiff_t,
@@ -465,17 +459,18 @@ matched by parenthesis constructs in the pattern.  */)
   return string_match_1 (regexp, string, start, 1);
 }
 
-/* Match REGEXP against STRING, searching all of STRING,
-   and return the index of the match, or negative on failure.
-   This does not clobber the match data.  */
+/* Match REGEXP against STRING using translation table TABLE,
+   searching all of STRING, and return the index of the match,
+   or negative on failure.  This does not clobber the match data.  */
 
 ptrdiff_t
-fast_string_match (Lisp_Object regexp, Lisp_Object string)
+fast_string_match_internal (Lisp_Object regexp, Lisp_Object string,
+			    Lisp_Object table)
 {
   ptrdiff_t val;
   struct re_pattern_buffer *bufp;
 
-  bufp = compile_pattern (regexp, 0, Qnil,
+  bufp = compile_pattern (regexp, 0, table,
 			  0, STRING_MULTIBYTE (string));
   immediate_quit = 1;
   re_match_object = string;
@@ -510,26 +505,6 @@ fast_c_string_match_ignore_case (Lisp_Object regexp,
   return val;
 }
 
-/* Like fast_string_match but ignore case.  */
-
-ptrdiff_t
-fast_string_match_ignore_case (Lisp_Object regexp, Lisp_Object string)
-{
-  ptrdiff_t val;
-  struct re_pattern_buffer *bufp;
-
-  bufp = compile_pattern (regexp, 0, Vascii_canon_table,
-			  0, STRING_MULTIBYTE (string));
-  immediate_quit = 1;
-  re_match_object = string;
-
-  val = re_search (bufp, SSDATA (string),
-		   SBYTES (string), 0,
-		   SBYTES (string), 0);
-  immediate_quit = 0;
-  return val;
-}
-
 /* Match REGEXP against the characters after POS to LIMIT, and return
    the number of matched characters.  If STRING is non-nil, match
    against the characters in it.  In that case, POS and LIMIT are
@@ -731,6 +706,12 @@ find_newline (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
 					       start, &next_change);
 		if (result)
 		  {
+		    /* When the cache revalidation is deferred,
+		       next-change might point beyond ZV, which will
+		       cause assertion violation in CHAR_TO_BYTE below.
+		       Limit next_change to ZV to avoid that.  */
+		    if (next_change > ZV)
+		      next_change = ZV;
 		    start = next_change;
 		    lim1 = next_change = end;
 		  }
@@ -985,6 +966,24 @@ scan_newline (ptrdiff_t start, ptrdiff_t start_byte,
   return shortage;
 }
 
+/* Like above, but always scan from point and report the
+   resulting position in *CHARPOS and *BYTEPOS.  */
+
+ptrdiff_t
+scan_newline_from_point (ptrdiff_t count, ptrdiff_t *charpos,
+			 ptrdiff_t *bytepos)
+{
+  ptrdiff_t shortage;
+
+  if (count <= 0)
+    *charpos = find_newline (PT, PT_BYTE, BEGV, BEGV_BYTE, count - 1,
+			     &shortage, bytepos, 1);
+  else
+    *charpos = find_newline (PT, PT_BYTE, ZV, ZV_BYTE, count,
+			     &shortage, bytepos, 1);
+  return shortage;
+}
+
 /* Like find_newline, but doesn't allow QUITting and doesn't return
    SHORTAGE.  */
 ptrdiff_t
@@ -1318,6 +1317,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
 	 translation.  Otherwise set to zero later.  */
       int char_base = -1;
       bool boyer_moore_ok = 1;
+      USE_SAFE_ALLOCA;
 
       /* MULTIBYTE says whether the text to be searched is multibyte.
 	 We must convert PATTERN to match that, or we will not really
@@ -1335,7 +1335,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
 	  raw_pattern_size_byte
 	    = count_size_as_multibyte (SDATA (string),
 				       raw_pattern_size);
-	  raw_pattern = alloca (raw_pattern_size_byte + 1);
+	  raw_pattern = SAFE_ALLOCA (raw_pattern_size_byte + 1);
 	  copy_text (SDATA (string), raw_pattern,
 		     SCHARS (string), 0, 1);
 	}
@@ -1349,7 +1349,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
 	     the chosen single-byte character set can possibly match.  */
 	  raw_pattern_size = SCHARS (string);
 	  raw_pattern_size_byte = SCHARS (string);
-	  raw_pattern = alloca (raw_pattern_size + 1);
+	  raw_pattern = SAFE_ALLOCA (raw_pattern_size + 1);
 	  copy_text (SDATA (string), raw_pattern,
 		     SBYTES (string), 1, 0);
 	}
@@ -1357,7 +1357,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
       /* Copy and optionally translate the pattern.  */
       len = raw_pattern_size;
       len_byte = raw_pattern_size_byte;
-      patbuf = alloca (len * MAX_MULTIBYTE_LENGTH);
+      SAFE_NALLOCA (patbuf, MAX_MULTIBYTE_LENGTH, len);
       pat = patbuf;
       base_pat = raw_pattern;
       if (multibyte)
@@ -1416,7 +1416,7 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
 
 		      while (boyer_moore_ok)
 			{
-			  if (ASCII_BYTE_P (inverse))
+			  if (ASCII_CHAR_P (inverse))
 			    {
 			      if (this_char_base > 0)
 				boyer_moore_ok = 0;
@@ -1497,13 +1497,15 @@ search_buffer (Lisp_Object string, ptrdiff_t pos, ptrdiff_t pos_byte,
       len_byte = pat - patbuf;
       pat = base_pat = patbuf;
 
-      if (boyer_moore_ok)
-	return boyer_moore (n, pat, len_byte, trt, inverse_trt,
-			    pos_byte, lim_byte,
-			    char_base);
-      else
-	return simple_search (n, pat, raw_pattern_size, len_byte, trt,
-			      pos, pos_byte, lim, lim_byte);
+      EMACS_INT result
+	= (boyer_moore_ok
+	   ? boyer_moore (n, pat, len_byte, trt, inverse_trt,
+			  pos_byte, lim_byte,
+			  char_base)
+	   : simple_search (n, pat, raw_pattern_size, len_byte, trt,
+			    pos, pos_byte, lim, lim_byte));
+      SAFE_FREE ();
+      return result;
     }
 }
 
@@ -1827,7 +1829,7 @@ boyer_moore (EMACS_INT n, unsigned char *base_pat,
 	     matching with CHAR_BASE are to be checked.  */
 	  int ch = -1;
 
-	  if (ASCII_BYTE_P (*ptr) || ! multibyte)
+	  if (ASCII_CHAR_P (*ptr) || ! multibyte)
 	    ch = *ptr;
 	  else if (char_base
 		   && ((pat_end - ptr) == 1 || CHAR_HEAD_P (ptr[1])))
@@ -2596,7 +2598,7 @@ since only regular expressions have distinguished subexpressions.  */)
 	    {
 	      FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext, pos, pos_byte);
 	      if (!buf_multibyte)
-		c = multibyte_char_to_unibyte (c);
+		c = CHAR_TO_BYTE8 (c);
 	    }
 	  else
 	    {
@@ -2619,7 +2621,7 @@ since only regular expressions have distinguished subexpressions.  */)
 		  FETCH_STRING_CHAR_ADVANCE_NO_CHECK (c, newtext,
 						      pos, pos_byte);
 		  if (!buf_multibyte && !ASCII_CHAR_P (c))
-		    c = multibyte_char_to_unibyte (c);
+		    c = CHAR_TO_BYTE8 (c);
 		}
 	      else
 		{
@@ -2809,7 +2811,8 @@ Return value is undefined if the last search failed.  */)
 
   prev = Qnil;
 
-  data = alloca ((2 * search_regs.num_regs + 1) * sizeof *data);
+  USE_SAFE_ALLOCA;
+  SAFE_NALLOCA (data, 1, 2 * search_regs.num_regs + 1);
 
   len = 0;
   for (i = 0; i < search_regs.num_regs; i++)
@@ -2852,25 +2855,28 @@ Return value is undefined if the last search failed.  */)
 
   /* If REUSE is not usable, cons up the values and return them.  */
   if (! CONSP (reuse))
-    return Flist (len, data);
-
-  /* If REUSE is a list, store as many value elements as will fit
-     into the elements of REUSE.  */
-  for (i = 0, tail = reuse; CONSP (tail);
-       i++, tail = XCDR (tail))
+    reuse = Flist (len, data);
+  else
     {
+      /* If REUSE is a list, store as many value elements as will fit
+	 into the elements of REUSE.  */
+      for (i = 0, tail = reuse; CONSP (tail);
+	   i++, tail = XCDR (tail))
+	{
+	  if (i < len)
+	    XSETCAR (tail, data[i]);
+	  else
+	    XSETCAR (tail, Qnil);
+	  prev = tail;
+	}
+
+      /* If we couldn't fit all value elements into REUSE,
+	 cons up the rest of them and add them to the end of REUSE.  */
       if (i < len)
-	XSETCAR (tail, data[i]);
-      else
-	XSETCAR (tail, Qnil);
-      prev = tail;
+	XSETCDR (prev, Flist (len - i, data + i));
     }
 
-  /* If we couldn't fit all value elements into REUSE,
-     cons up the rest of them and add them to the end of REUSE.  */
-  if (i < len)
-    XSETCDR (prev, Flist (len - i, data + i));
-
+  SAFE_FREE ();
   return reuse;
 }
 
@@ -3075,7 +3081,8 @@ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
 
   CHECK_STRING (string);
 
-  temp = alloca (SBYTES (string) * 2);
+  USE_SAFE_ALLOCA;
+  SAFE_NALLOCA (temp, 2, SBYTES (string));
 
   /* Now copy the data into the new string, inserting escapes. */
 
@@ -3093,10 +3100,194 @@ DEFUN ("regexp-quote", Fregexp_quote, Sregexp_quote, 1, 1, 0,
       *out++ = *in;
     }
 
-  return make_specified_string (temp,
-				SCHARS (string) + backslashes_added,
-				out - temp,
-				STRING_MULTIBYTE (string));
+  Lisp_Object result
+    = make_specified_string (temp,
+			     SCHARS (string) + backslashes_added,
+			     out - temp,
+			     STRING_MULTIBYTE (string));
+  SAFE_FREE ();
+  return result;
+}
+
+/* Like find_newline, but doesn't use the cache, and only searches forward.  */
+static ptrdiff_t
+find_newline1 (ptrdiff_t start, ptrdiff_t start_byte, ptrdiff_t end,
+	       ptrdiff_t end_byte, ptrdiff_t count, ptrdiff_t *shortage,
+	       ptrdiff_t *bytepos, bool allow_quit)
+{
+  if (count > 0)
+    {
+      if (!end)
+	end = ZV, end_byte = ZV_BYTE;
+    }
+  else
+    {
+      if (!end)
+	end = BEGV, end_byte = BEGV_BYTE;
+    }
+  if (end_byte == -1)
+    end_byte = CHAR_TO_BYTE (end);
+
+  if (shortage != 0)
+    *shortage = 0;
+
+  immediate_quit = allow_quit;
+
+  if (count > 0)
+    while (start != end)
+      {
+        /* Our innermost scanning loop is very simple; it doesn't know
+           about gaps, buffer ends, or the newline cache.  ceiling is
+           the position of the last character before the next such
+           obstacle --- the last character the dumb search loop should
+           examine.  */
+	ptrdiff_t tem, ceiling_byte = end_byte - 1;
+
+	if (start_byte == -1)
+	  start_byte = CHAR_TO_BYTE (start);
+
+        /* The dumb loop can only scan text stored in contiguous
+           bytes. BUFFER_CEILING_OF returns the last character
+           position that is contiguous, so the ceiling is the
+           position after that.  */
+	tem = BUFFER_CEILING_OF (start_byte);
+	ceiling_byte = min (tem, ceiling_byte);
+
+        {
+          /* The termination address of the dumb loop.  */
+	  unsigned char *lim_addr = BYTE_POS_ADDR (ceiling_byte) + 1;
+	  ptrdiff_t lim_byte = ceiling_byte + 1;
+
+	  /* Nonpositive offsets (relative to LIM_ADDR and LIM_BYTE)
+	     of the base, the cursor, and the next line.  */
+	  ptrdiff_t base = start_byte - lim_byte;
+	  ptrdiff_t cursor, next;
+
+	  for (cursor = base; cursor < 0; cursor = next)
+	    {
+              /* The dumb loop.  */
+	      unsigned char *nl = memchr (lim_addr + cursor, '\n', - cursor);
+	      next = nl ? nl - lim_addr : 0;
+
+              if (! nl)
+		break;
+	      next++;
+
+	      if (--count == 0)
+		{
+		  immediate_quit = 0;
+		  if (bytepos)
+		    *bytepos = lim_byte + next;
+		  return BYTE_TO_CHAR (lim_byte + next);
+		}
+            }
+
+	  start_byte = lim_byte;
+	  start = BYTE_TO_CHAR (start_byte);
+        }
+      }
+
+  immediate_quit = 0;
+  if (shortage)
+    *shortage = count;
+  if (bytepos)
+    {
+      *bytepos = start_byte == -1 ? CHAR_TO_BYTE (start) : start_byte;
+      eassert (*bytepos == CHAR_TO_BYTE (start));
+    }
+  return start;
+}
+
+DEFUN ("newline-cache-check", Fnewline_cache_check, Snewline_cache_check,
+       0, 1, 0,
+       doc: /* Check the newline cache of BUFFER against buffer contents.
+
+BUFFER defaults to the current buffer.
+
+Value is an array of 2 sub-arrays of buffer positions for newlines,
+the first based on the cache, the second based on actually scanning
+the buffer.  If the buffer doesn't have a cache, the value is nil.  */)
+  (Lisp_Object buffer)
+{
+  struct buffer *buf, *old = NULL;
+  ptrdiff_t shortage, nl_count_cache, nl_count_buf;
+  Lisp_Object cache_newlines, buf_newlines, val;
+  ptrdiff_t from, found, i;
+
+  if (NILP (buffer))
+    buf = current_buffer;
+  else
+    {
+      CHECK_BUFFER (buffer);
+      buf = XBUFFER (buffer);
+      old = current_buffer;
+    }
+  if (buf->base_buffer)
+    buf = buf->base_buffer;
+
+  /* If the buffer doesn't have a newline cache, return nil.  */
+  if (NILP (BVAR (buf, cache_long_scans))
+      || buf->newline_cache == NULL)
+    return Qnil;
+
+  /* find_newline can only work on the current buffer.  */
+  if (old != NULL)
+    set_buffer_internal_1 (buf);
+
+  /* How many newlines are there according to the cache?  */
+  find_newline (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
+		TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
+  nl_count_cache = TYPE_MAXIMUM (ptrdiff_t) - shortage;
+
+  /* Create vector and populate it.  */
+  cache_newlines = make_uninit_vector (nl_count_cache);
+
+  if (nl_count_cache)
+    {
+      for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
+	{
+	  ptrdiff_t from_byte = CHAR_TO_BYTE (from);
+
+	  found = find_newline (from, from_byte, 0, -1, 1, &shortage,
+				NULL, true);
+	  if (shortage != 0 || i >= nl_count_cache)
+	    break;
+	  ASET (cache_newlines, i, make_number (found - 1));
+	}
+      /* Fill the rest of slots with an invalid position.  */
+      for ( ; i < nl_count_cache; i++)
+	ASET (cache_newlines, i, make_number (-1));
+    }
+
+  /* Now do the same, but without using the cache.  */
+  find_newline1 (BEGV, BEGV_BYTE, ZV, ZV_BYTE,
+		 TYPE_MAXIMUM (ptrdiff_t), &shortage, NULL, true);
+  nl_count_buf = TYPE_MAXIMUM (ptrdiff_t) - shortage;
+  buf_newlines = make_uninit_vector (nl_count_buf);
+  if (nl_count_buf)
+    {
+      for (from = BEGV, found = from, i = 0; from < ZV; from = found, i++)
+	{
+	  ptrdiff_t from_byte = CHAR_TO_BYTE (from);
+
+	  found = find_newline1 (from, from_byte, 0, -1, 1, &shortage,
+				 NULL, true);
+	  if (shortage != 0 || i >= nl_count_buf)
+	    break;
+	  ASET (buf_newlines, i, make_number (found - 1));
+	}
+      for ( ; i < nl_count_buf; i++)
+	ASET (buf_newlines, i, make_number (-1));
+    }
+
+  /* Construct the value and return it.  */
+  val = make_uninit_vector (2);
+  ASET (val, 0, cache_newlines);
+  ASET (val, 1, buf_newlines);
+
+  if (old != NULL)
+    set_buffer_internal_1 (old);
+  return val;
 }
 
 void
@@ -3119,7 +3310,10 @@ syms_of_search (void)
     }
   searchbuf_head = &searchbufs[0];
 
+  /* Error condition used for failing searches.  */
   DEFSYM (Qsearch_failed, "search-failed");
+
+  /* Error condition signaled when regexp compile_pattern fails.  */
   DEFSYM (Qinvalid_regexp, "invalid-regexp");
 
   Fput (Qsearch_failed, Qerror_conditions,
@@ -3170,4 +3364,5 @@ is to bind it with `let' around a small expression.  */);
   defsubr (&Smatch_data);
   defsubr (&Sset_match_data);
   defsubr (&Sregexp_quote);
+  defsubr (&Snewline_cache_check);
 }