(x_set_line_spacing): New function.

[gnu-emacs] / src / search.c
diff --git a/src/search.c b/src/search.c

index 225155d73ac6ba35395595c6bc43937f292f0377..34dcc7e78a2de301941616044e11bafdaddb649a 100644 (file)
--- a/src/search.c
+++ b/src/search.c
@@ -1,5 +1,5 @@
  /* String search routines for GNU Emacs.
-   Copyright (C) 1985, 86, 87, 93, 94, 97, 1998 Free Software Foundation, Inc.
+   Copyright (C) 1985, 86,87,93,94,97,98, 1999 Free Software Foundation, Inc.
  
  This file is part of GNU Emacs.
  
@@ -20,9 +20,6 @@ Boston, MA 02111-1307, USA.  */
  
  
  #include <config.h>
-#ifdef STDC_HEADERS
-#include <stdlib.h>
-#endif
  #include "lisp.h"
  #include "syntax.h"
  #include "category.h"
@@ -100,12 +97,6 @@ matcher_overflow ()
    error ("Stack overflow in regexp matcher");
  }
  
-#ifdef __STDC__
-#define CONST const
-#else
-#define CONST
-#endif
-
  /* Compile a regexp and signal a Lisp error if anything goes wrong.
     PATTERN is the pattern to compile.
     CP is the place to put the result.
@@ -182,6 +173,23 @@ compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte)
    cp->regexp = Fcopy_sequence (pattern);
  }
  
+/* Shrink each compiled regexp buffer in the cache
+   to the size actually used right now.
+   This is called from garbage collection.  */
+
+void
+shrink_regexp_cache ()
+{
+  struct regexp_cache *cp, **cpp;
+
+  for (cp = searchbuf_head; cp != 0; cp = cp->next)
+    {
+      cp->buf.allocated = cp->buf.used;
+      cp->buf.buffer
+       = (unsigned char *) realloc (cp->buf.buffer, cp->buf.used);
+    }
+}
+
  /* Compile a regexp if necessary, but first check to see if there's one in
     the cache.
     PATTERN is the pattern to compile.
@@ -205,6 +213,13 @@ compile_pattern (pattern, regp, translate, posix, multibyte)
    for (cpp = &searchbuf_head; ; cpp = &cp->next)
      {
        cp = *cpp;
+      /* Entries are initialized to nil, and may be set to nil by
+        compile_pattern_1 if the pattern isn't valid.  Don't apply
+        XSTRING in those cases.  However, compile_pattern_1 is only
+        applied to the cache entry we pick here to reuse.  So nil
+        should never appear before a non-nil entry.  */
+      if (NILP (cp->regexp))
+       goto compile_it;
        if (XSTRING (cp->regexp)->size == XSTRING (pattern)->size
           && !NILP (Fstring_equal (cp->regexp, pattern))
           && EQ (cp->buf.translate, (! NILP (translate) ? translate : make_number (0)))
@@ -212,9 +227,12 @@ compile_pattern (pattern, regp, translate, posix, multibyte)
           && cp->buf.multibyte == multibyte)
         break;
  
-      /* If we're at the end of the cache, compile into the last cell.  */
+      /* If we're at the end of the cache, compile into the nil cell
+        we found, or the last (least recently used) cell with a
+        string value.  */
        if (cp->next == 0)
         {
+       compile_it:
           compile_pattern_1 (cp, pattern, translate, regp, posix, multibyte);
           break;
         }
@@ -294,6 +312,8 @@ looking_at_1 (string, posix)
    i = re_match_2 (bufp, (char *) p1, s1, (char *) p2, s2,
                   PT_BYTE - BEGV_BYTE, &search_regs,
                   ZV_BYTE - BEGV_BYTE);
+  immediate_quit = 0;
+  
    if (i == -2)
      matcher_overflow ();
  
@@ -308,7 +328,6 @@ looking_at_1 (string, posix)
             = BYTE_TO_CHAR (search_regs.end[i] + BEGV_BYTE);
         }
    XSETBUFFER (last_thing_searched, current_buffer);
-  immediate_quit = 0;
    return val;
  }
  
@@ -398,6 +417,7 @@ string_match_1 (regexp, string, start, posix)
  
  DEFUN ("string-match", Fstring_match, Sstring_match, 2, 3, 0,
    "Return index of start of first match for REGEXP in STRING, or nil.\n\
+Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
  If third arg START is non-nil, start search at that index in STRING.\n\
  For index of first char beyond the match, do (match-end 0).\n\
  `match-end' and `match-beginning' also give indices of substrings\n\
@@ -411,6 +431,7 @@ matched by parenthesis constructs in the pattern.")
  DEFUN ("posix-string-match", Fposix_string_match, Sposix_string_match, 2, 3, 0,
    "Return index of start of first match for REGEXP in STRING, or nil.\n\
  Find the longest match, in accord with Posix regular expression rules.\n\
+Case is ignored if `case-fold-search' is non-nil in the current buffer.\n\
  If third arg START is non-nil, start search at that index in STRING.\n\
  For index of first char beyond the match, do (match-end 0).\n\
  `match-end' and `match-beginning' also give indices of substrings\n\
@@ -1005,17 +1026,14 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
    if (running_asynch_code)
      save_search_regs ();
  
+  /* Searching 0 times means don't move.  */
    /* Null string is found at starting position.  */
-  if (len == 0)
+  if (len == 0 || n == 0)
      {
        set_search_regs (pos, 0);
        return pos;
      }
  
-  /* Searching 0 times means don't move.  */
-  if (n == 0)
-    return pos;
-
    if (RE && !trivial_regexp_p (string))
      {
        unsigned char *p1, *p2;
@@ -1128,7 +1146,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
        int multibyte = !NILP (current_buffer->enable_multibyte_characters);
        unsigned char *base_pat = XSTRING (string)->data;
        int charset_base = -1;
-      int simple = 1;
+      int boyer_moore_ok = 1;
  
        /* MULTIBYTE says whether the text to be searched is multibyte.
          We must convert PATTERN to match that, or we will not really
@@ -1175,7 +1193,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
         {
           while (--len >= 0)
             {
-             unsigned char workbuf[4], *str;
+             unsigned char str[MAX_MULTIBYTE_LENGTH];
               int c, translated, inverse;
               int in_charlen, charlen;
  
@@ -1190,17 +1208,26 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
                 }
  
               c = STRING_CHAR_AND_LENGTH (base_pat, len_byte, in_charlen);
+
               /* Translate the character, if requested.  */
               TRANSLATE (translated, trt, c);
               /* If translation changed the byte-length, go back
                  to the original character.  */
-             charlen = CHAR_STRING (translated, workbuf, str);
+             charlen = CHAR_STRING (translated, str);
               if (in_charlen != charlen)
                 {
                   translated = c;
-                 charlen = CHAR_STRING (c, workbuf, str);
+                 charlen = CHAR_STRING (c, str);
                 }
  
+             /* If we are searching for something strange,
+                an invalid multibyte code, don't use boyer-moore.  */
+             if (! ASCII_BYTE_P (translated)
+                 && (charlen == 1 /* 8bit code */
+                     || charlen != in_charlen /* invalid multibyte code */
+                     ))
+               boyer_moore_ok = 0;
+
               TRANSLATE (inverse, inverse_trt, c);
  
               /* Did this char actually get translated?
@@ -1209,15 +1236,13 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
                 {
                   /* Keep track of which character set row
                      contains the characters that need translation.  */
-                 int charset_base_code = c & ~0xff;
+                 int charset_base_code = c & ~CHAR_FIELD3_MASK;
                   if (charset_base == -1)
                     charset_base = charset_base_code;
                   else if (charset_base != charset_base_code)
                     /* If two different rows appear, needing translation,
                        then we cannot use boyer_moore search.  */
-                   simple = 0;
-                   /* ??? Handa: this must do simple = 0
-                      if c is a composite character.  */
+                   boyer_moore_ok = 0;
                 }
  
               /* Store this character into the translated pattern.  */
@@ -1229,9 +1254,11 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
         }
        else
         {
+         /* Unibyte buffer.  */
+         charset_base = 0;
           while (--len >= 0)
             {
-             int c, translated, inverse;
+             int c, translated;
  
               /* If we got here and the RE flag is set, it's because we're
                  dealing with a regexp known to be trivial, so the backslash
@@ -1243,22 +1270,6 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
                 }
               c = *base_pat++;
               TRANSLATE (translated, trt, c);
-             TRANSLATE (inverse, inverse_trt, c);
-
-             /* Did this char actually get translated?
-                Would any other char get translated into it?  */
-             if (translated != c || inverse != c)
-               {
-                 /* Keep track of which character set row
-                    contains the characters that need translation.  */
-                 int charset_base_code = c & ~0xff;
-                 if (charset_base == -1)
-                   charset_base = charset_base_code;
-                 else if (charset_base != charset_base_code)
-                   /* If two different rows appear, needing translation,
-                      then we cannot use boyer_moore search.  */
-                   simple = 0;
-               }
               *pat++ = translated;
             }
         }
@@ -1267,7 +1278,7 @@ search_buffer (string, pos, pos_byte, lim, lim_byte, n,
        len = raw_pattern_size;
        pat = base_pat = patbuf;
  
-      if (simple)
+      if (boyer_moore_ok)
         return boyer_moore (n, pat, len, len_byte, trt, inverse_trt,
                             pos, pos_byte, lim, lim_byte,
                             charset_base);
@@ -1612,7 +1623,7 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
               while (! CHAR_HEAD_P (*charstart))
                 charstart--;
               untranslated = STRING_CHAR (charstart, ptr - charstart + 1);
-             if (charset_base == (untranslated & ~0xff))
+             if (charset_base == (untranslated & ~CHAR_FIELD3_MASK))
                 {
                   TRANSLATE (ch, trt, untranslated);
                   if (! CHAR_HEAD_P (*ptr))
@@ -1896,12 +1907,15 @@ boyer_moore (n, base_pat, len, len_byte, trt, inverse_trt,
  }
  
  /* Record beginning BEG_BYTE and end BEG_BYTE + NBYTES
-   for a match just found in the current buffer.  */
+   for the overall match just found in the current buffer.
+   Also clear out the match data for registers 1 and up.  */
  
  static void
  set_search_regs (beg_byte, nbytes)
       int beg_byte, nbytes;
  {
+  int i;
+
    /* Make sure we have registers in which to store
       the match position.  */
    if (search_regs.num_regs == 0)
@@ -1911,6 +1925,13 @@ set_search_regs (beg_byte, nbytes)
        search_regs.num_regs = 2;
      }
  
+  /* Clear out the other registers.  */
+  for (i = 1; i < search_regs.num_regs; i++)
+    {
+      search_regs.start[i] = -1;
+      search_regs.end[i] = -1;
+    }
+
    search_regs.start[0] = BYTE_TO_CHAR (beg_byte);
    search_regs.end[0] = BYTE_TO_CHAR (beg_byte + nbytes);
    XSETBUFFER (last_thing_searched, current_buffer);
@@ -1959,8 +1980,12 @@ wordify (string)
      return build_string ("");
  
    adjust = - punct_count + 5 * (word_count - 1) + 4;
-  val = make_uninit_multibyte_string (len + adjust,
-                                     STRING_BYTES (XSTRING (string)) + adjust);
+  if (STRING_MULTIBYTE (string))
+    val = make_uninit_multibyte_string (len + adjust,
+                                       STRING_BYTES (XSTRING (string))
+                                       + adjust);
+  else
+    val = make_uninit_string (len + adjust);
  
    o = XSTRING (val)->data;
    *o++ = '\\';
@@ -1975,7 +2000,10 @@ wordify (string)
        if (STRING_MULTIBYTE (string))
         FETCH_STRING_CHAR_ADVANCE (c, string, i, i_byte);
        else
-       c = XSTRING (string)->data[i++];
+       {
+         c = XSTRING (string)->data[i++];
+         i_byte++;
+       }
  
        if (SYNTAX (c) == Sword)
         {
@@ -2163,7 +2191,7 @@ since only regular expressions have distinguished subexpressions.")
       Lisp_Object newtext, fixedcase, literal, string, subexp;
  {
    enum { nochange, all_caps, cap_initial } case_action;
-  register int pos, last;
+  register int pos, pos_byte;
    int some_multiletter_word;
    int some_lowercase;
    int some_uppercase;
@@ -2213,18 +2241,16 @@ since only regular expressions have distinguished subexpressions.")
  
    if (NILP (fixedcase))
      {
-      int beg;
        /* Decide how to casify by examining the matched text. */
+      int last;
  
-      if (NILP (string))
-       last = CHAR_TO_BYTE (search_regs.end[sub]);
-      else
-       last = search_regs.end[sub];
+      pos = search_regs.start[sub];
+      last = search_regs.end[sub];
  
        if (NILP (string))
-       beg = CHAR_TO_BYTE (search_regs.start[sub]);
+       pos_byte = CHAR_TO_BYTE (pos);
        else
-       beg = search_regs.start[sub];
+       pos_byte = string_char_to_byte (string, pos);
  
        prevc = '\n';
        case_action = all_caps;
@@ -2236,12 +2262,15 @@ since only regular expressions have distinguished subexpressions.")
        some_nonuppercase_initial = 0;
        some_uppercase = 0;
  
-      for (pos = beg; pos < last; pos++)
+      while (pos < last)
         {
           if (NILP (string))
-           c = FETCH_BYTE (pos);
+           {
+             c = FETCH_CHAR (pos_byte);
+             INC_BOTH (pos, pos_byte);
+           }
           else
-           c = XSTRING (string)->data[pos];
+           FETCH_STRING_CHAR_ADVANCE (c, string, pos, pos_byte);
  
           if (LOWERCASEP (c))
             {
@@ -2300,16 +2329,16 @@ since only regular expressions have distinguished subexpressions.")
          if desired.  */
        if (NILP (literal))
         {
-         int lastpos = -1;
-         int lastpos_byte = -1;
+         int lastpos = 0;
+         int lastpos_byte = 0;
           /* We build up the substituted string in ACCUM.  */
           Lisp_Object accum;
           Lisp_Object middle;
-         int pos_byte;
+         int length = STRING_BYTES (XSTRING (newtext));
  
           accum = Qnil;
  
-         for (pos_byte = 0, pos = 0; pos_byte < STRING_BYTES (XSTRING (newtext));)
+         for (pos_byte = 0, pos = 0; pos_byte < length;)
             {
               int substart = -1;
               int subend;
@@ -2340,10 +2369,10 @@ since only regular expressions have distinguished subexpressions.")
                 }
               if (substart >= 0)
                 {
-                 if (pos - 1 != lastpos + 1)
-                   middle = substring_both (newtext, lastpos + 1,
-                                            lastpos_byte + 1,
-                                            pos - 1, pos_byte - 1);
+                 if (pos - 2 != lastpos)
+                   middle = substring_both (newtext, lastpos,
+                                            lastpos_byte,
+                                            pos - 2, pos_byte - 2);
                   else
                     middle = Qnil;
                   accum = concat3 (accum, middle,
@@ -2355,9 +2384,9 @@ since only regular expressions have distinguished subexpressions.")
                 }
               else if (delbackslash)
                 {
-                 middle = substring_both (newtext, lastpos + 1,
-                                          lastpos_byte + 1,
-                                          pos, pos_byte);
+                 middle = substring_both (newtext, lastpos,
+                                          lastpos_byte,
+                                          pos - 1, pos_byte - 1);
  
                   accum = concat2 (accum, middle);
                   lastpos = pos;
@@ -2365,9 +2394,9 @@ since only regular expressions have distinguished subexpressions.")
                 }
             }
  
-         if (pos != lastpos + 1)
-           middle = substring_both (newtext, lastpos + 1,
-                                    lastpos_byte + 1,
+         if (pos != lastpos)
+           middle = substring_both (newtext, lastpos,
+                                    lastpos_byte,
                                      pos, pos_byte);
           else
             middle = Qnil;
@@ -2385,8 +2414,10 @@ since only regular expressions have distinguished subexpressions.")
      }
  
    /* Record point, the move (quietly) to the start of the match.  */
-  if (PT > search_regs.start[sub])
+  if (PT >= search_regs.end[sub])
      opoint = PT - ZV;
+  else if (PT > search_regs.start[sub])
+    opoint = search_regs.end[sub] - ZV;
    else
      opoint = PT;
  
@@ -2400,39 +2431,118 @@ since only regular expressions have distinguished subexpressions.")
      Finsert_and_inherit (1, &newtext);
    else
      {
-      struct gcpro gcpro1;
-      GCPRO1 (newtext);
-
-      for (pos = 0; pos < XSTRING (newtext)->size; pos++)
+      int length = STRING_BYTES (XSTRING (newtext));
+      unsigned char *substed;
+      int substed_alloc_size, substed_len;
+      int buf_multibyte = !NILP (current_buffer->enable_multibyte_characters);
+      int str_multibyte = STRING_MULTIBYTE (newtext);
+      Lisp_Object rev_tbl;
+
+      rev_tbl= (!buf_multibyte && CHAR_TABLE_P (Vnonascii_translation_table)
+               ? Fchar_table_extra_slot (Vnonascii_translation_table,
+                                         make_number (0))
+               : Qnil);
+
+      substed_alloc_size = length * 2 + 100;
+      substed = (unsigned char *) xmalloc (substed_alloc_size + 1);
+      substed_len = 0;
+
+      /* Go thru NEWTEXT, producing the actual text to insert in
+        SUBSTED while adjusting multibyteness to that of the current
+        buffer.  */
+
+      for (pos_byte = 0, pos = 0; pos_byte < length;)
         {
-         int offset = PT - search_regs.start[sub];
+         unsigned char str[MAX_MULTIBYTE_LENGTH];
+         unsigned char *add_stuff;
+         int add_len;
+         int idx = -1;
+
+         if (str_multibyte)
+           {
+             FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
+             if (!buf_multibyte)
+               c = multibyte_char_to_unibyte (c, rev_tbl);
+           }
+         else
+           {
+             /* Note that we don't have to increment POS.  */
+             c = XSTRING (newtext)->data[pos_byte++];
+             if (buf_multibyte)
+               c = unibyte_char_to_multibyte (c);
+           }
+
+         /* Either set ADD_STUFF and ADD_LEN to the text to put in SUBSTED,
+            or set IDX to a match index, which means put that part
+            of the buffer text into SUBSTED.  */
  
-         c = XSTRING (newtext)->data[pos];
           if (c == '\\')
             {
-             c = XSTRING (newtext)->data[++pos];
+             if (str_multibyte)
+               {
+                 FETCH_STRING_CHAR_ADVANCE (c, newtext, pos, pos_byte);
+                 if (!buf_multibyte && !SINGLE_BYTE_CHAR_P (c))
+                   c = multibyte_char_to_unibyte (c, rev_tbl);
+               }
+             else
+               {
+                 c = XSTRING (newtext)->data[pos_byte++];
+                 if (buf_multibyte)
+                   c = unibyte_char_to_multibyte (c);
+               }
+
               if (c == '&')
-               Finsert_buffer_substring
-                 (Fcurrent_buffer (),
-                  make_number (search_regs.start[sub] + offset),
-                  make_number (search_regs.end[sub] + offset));
+               idx = sub;
               else if (c >= '1' && c <= '9' && c <= search_regs.num_regs + '0')
                 {
                   if (search_regs.start[c - '0'] >= 1)
-                   Finsert_buffer_substring
-                     (Fcurrent_buffer (),
-                      make_number (search_regs.start[c - '0'] + offset),
-                      make_number (search_regs.end[c - '0'] + offset));
+                   idx = c - '0';
                 }
               else if (c == '\\')
-               insert_char (c);
+               add_len = 1, add_stuff = "\\";
               else
-               error ("Invalid use of `\\' in replacement text");
+               {
+                 xfree (substed);
+                 error ("Invalid use of `\\' in replacement text");
+               }
             }
           else
-           insert_char (c);
+           {
+             add_len = CHAR_STRING (c, str);
+             add_stuff = str;
+           }
+
+         /* If we want to copy part of a previous match,
+            set up ADD_STUFF and ADD_LEN to point to it.  */
+         if (idx >= 0)
+           {
+             int begbyte = CHAR_TO_BYTE (search_regs.start[idx]);
+             add_len = CHAR_TO_BYTE (search_regs.end[idx]) - begbyte;
+             if (search_regs.start[idx] < GPT && GPT < search_regs.end[idx])
+               move_gap (search_regs.start[idx]);
+             add_stuff = BYTE_POS_ADDR (begbyte);
+           }
+
+         /* Now the stuff we want to add to SUBSTED
+            is invariably ADD_LEN bytes starting at ADD_STUFF.  */
+
+         /* Make sure SUBSTED is big enough.  */
+         if (substed_len + add_len >= substed_alloc_size)
+           {
+             substed_alloc_size = substed_len + add_len + 500;
+             substed = (unsigned char *) xrealloc (substed,
+                                                   substed_alloc_size + 1);
+           }
+
+         /* Now add to the end of SUBSTED.  */
+         bcopy (add_stuff, substed + substed_len, add_len);
+         substed_len += add_len;
         }
-      UNGCPRO;
+
+      /* Now insert what we accumulated.  */
+      insert_and_inherit (substed, substed_len);
+
+      xfree (substed);
      }
  
    inslen = PT - (search_regs.start[sub]);
@@ -2565,19 +2675,19 @@ to hold all the values, and if INTEGERS is non-nil, no consing is done.")
    /* If REUSE is a list, store as many value elements as will fit
       into the elements of REUSE.  */
    for (i = 0, tail = reuse; CONSP (tail);
-       i++, tail = XCONS (tail)->cdr)
+       i++, tail = XCDR (tail))
      {
        if (i < 2 * len + 2)
-       XCONS (tail)->car = data[i];
+       XCAR (tail) = data[i];
        else
-       XCONS (tail)->car = Qnil;
+       XCAR (tail) = Qnil;
        prev = tail;
      }
  
    /* If we couldn't fit all value elements into REUSE,
       cons up the rest of them and add them to the end of REUSE.  */
    if (i < 2 * len + 2)
-    XCONS (prev)->cdr = Flist (2 * len + 2 - i, data + i);
+    XCDR (prev) = Flist (2 * len + 2 - i, data + i);
  
    return reuse;
  }