(print_string): Don't ignore garbage bytes following a

[gnu-emacs] / src / casefiddle.c
diff --git a/src/casefiddle.c b/src/casefiddle.c

index 7780b29d0e32b51e7153f90068a96e1a3d26e30b..b6d8b21f606674cf31ca72bc2d9f0d856ccbf7fc 100644 (file)
--- a/src/casefiddle.c
+++ b/src/casefiddle.c
@@ -1,5 +1,5 @@
  /* GNU Emacs case conversion functions.
-   Copyright (C) 1985, 1994 Free Software Foundation, Inc.
+   Copyright (C) 1985, 1994, 1997 Free Software Foundation, Inc.
  
  This file is part of GNU Emacs.
  
@@ -47,34 +47,85 @@ casify_object (flag, obj)
      {
        if (INTEGERP (obj))
         {
-         tem = Faref (current_buffer->downcase_table, obj);
-         if (EQ (tem, Qidentity))
-           tem = obj;
+         c = DOWNCASE (XFASTINT (obj));
           if (inword)
-           obj = tem;
-         else if (EQ (tem, obj))
+           XSETFASTINT (obj, c);
+         else if (c == XFASTINT (obj))
             {
-             tem = Faref (current_buffer->upcase_table, obj);
-             if (!EQ (tem, Qidentity))
-               obj = tem;
+             c = UPCASE1 (XFASTINT (obj));
+             XSETFASTINT (obj, c);
             }
           return obj;
         }
+
        if (STRINGP (obj))
         {
+         int multibyte = STRING_MULTIBYTE (obj);
+
           obj = Fcopy_sequence (obj);
-         len = XSTRING (obj)->size;
-         for (i = 0; i < len; i++)
+         len = STRING_BYTES (XSTRING (obj));
+
+         /* Scan all single-byte characters from start of string.  */
+         for (i = 0; i < len;)
             {
               c = XSTRING (obj)->data[i];
+
+             if (multibyte && c >= 0x80)
+               /* A multibyte character can't be handled in this
+                   simple loop.  */
+               break;
               if (inword && flag != CASE_CAPITALIZE_UP)
                 c = DOWNCASE (c);
               else if (!UPPERCASEP (c)
                        && (!inword || flag != CASE_CAPITALIZE_UP))
                 c = UPCASE1 (c);
+             /* If this char won't fit in a single-byte string.
+                fall out to the multibyte case.  */
+             if (multibyte ? ! ASCII_BYTE_P (c)
+                 : ! SINGLE_BYTE_CHAR_P (c))
+               break;
+
               XSTRING (obj)->data[i] = c;
               if ((int) flag >= (int) CASE_CAPITALIZE)
                 inword = SYNTAX (c) == Sword;
+             i++;
+           }
+
+         /* If we didn't do the whole string as single-byte,
+            scan the rest in a more complex way.  */
+         if (i < len)
+           {
+             /* The work is not yet finished because of a multibyte
+                character just encountered.  */
+             int fromlen, tolen, j = i, j_byte = i;
+             char *buf
+               = (char *) alloca ((len - i) * MAX_LENGTH_OF_MULTI_BYTE_FORM
+                                  + i);
+             unsigned char *str, workbuf[4];
+
+             /* Copy data already handled.  */
+             bcopy (XSTRING (obj)->data, buf, i);
+
+             /* From now on, I counts bytes.  */
+             while (i < len)
+               {
+                 c = STRING_CHAR_AND_LENGTH (XSTRING (obj)->data + i,
+                                             len - i, fromlen);
+                 if (inword && flag != CASE_CAPITALIZE_UP)
+                   c = DOWNCASE (c);
+                 else if (!UPPERCASEP (c)
+                          && (!inword || flag != CASE_CAPITALIZE_UP))
+                   c = UPCASE1 (c);
+                 tolen = CHAR_STRING (c, workbuf, str);
+                 bcopy (str, buf + j_byte, tolen);
+                 i += fromlen;
+                 j++;
+                 j_byte += tolen;
+                 if ((int) flag >= (int) CASE_CAPITALIZE)
+                   inword = SYNTAX (c) == Sword;
+               }
+             obj = make_specified_string (buf, j, j_byte,
+                                          STRING_MULTIBYTE (obj));
             }
           return obj;
         }
@@ -131,6 +182,7 @@ The argument object is not altered--the value is a copy.")
  /* flag is CASE_UP, CASE_DOWN or CASE_CAPITALIZE or CASE_CAPITALIZE_UP.
     b and e specify range of buffer to operate on. */
  
+void
  casify_region (flag, b, e)
       enum case_action flag;
       Lisp_Object b, e;
@@ -138,7 +190,9 @@ casify_region (flag, b, e)
    register int i;
    register int c;
    register int inword = flag == CASE_DOWN;
+  register int multibyte = !NILP (current_buffer->enable_multibyte_characters);
    int start, end;
+  int start_byte, end_byte;
    Lisp_Object ch, downch, val;
  
    if (EQ (b, e))
@@ -154,59 +208,52 @@ casify_region (flag, b, e)
    end = XFASTINT (e);
    modify_region (current_buffer, start, end);
    record_change (start, end - start);
+  start_byte = CHAR_TO_BYTE (start);
+  end_byte = CHAR_TO_BYTE (end);
  
-  if (NILP (current_buffer->enable_multibyte_characters))
+  for (i = start_byte; i < end_byte; i++)
      {
-      for (i = start; i < end; i++)
-       {
-         c = FETCH_BYTE (i);
-         if (inword && flag != CASE_CAPITALIZE_UP)
-           c = DOWNCASE (c);
-         else if (!UPPERCASEP (c)
-                  && (!inword || flag != CASE_CAPITALIZE_UP))
-           c = UPCASE1 (c);
-         FETCH_BYTE (i) = c;
-         if ((int) flag >= (int) CASE_CAPITALIZE)
-           inword = SYNTAX (c) == Sword;
-       }
+      c = FETCH_BYTE (i);
+      if (multibyte && c >= 0x80)
+       /* A multibyte character can't be handled in this simple loop.  */
+       break;
+      if (inword && flag != CASE_CAPITALIZE_UP)
+       c = DOWNCASE (c);
+      else if (!UPPERCASEP (c)
+              && (!inword || flag != CASE_CAPITALIZE_UP))
+       c = UPCASE1 (c);
+      FETCH_BYTE (i) = c;
+      if ((int) flag >= (int) CASE_CAPITALIZE)
+       inword = SYNTAX (c) == Sword;
      }
-  else
+  if (i < end_byte)
      {
-      Lisp_Object down, up;
+      /* The work is not yet finished because of a multibyte character
+        just encountered.  */
        int opoint = PT;
+      int opoint_byte = PT_BYTE;
+      int c2;
  
-      down = current_buffer->downcase_table;
-      up = current_buffer->upcase_table;
-      for (i = start; i < end;)
+      while (i < end_byte)
         {
-         c = FETCH_MULTIBYTE_CHAR (i);
-         XSETFASTINT (ch, c);
-         downch = Faref (down, ch);
-         if (EQ (downch, Qidentity))
-           downch = ch;
+         if ((c = FETCH_BYTE (i)) >= 0x80)
+           c = FETCH_MULTIBYTE_CHAR (i);
+         c2 = c;
           if (inword && flag != CASE_CAPITALIZE_UP)
-           val = downch;
-         else if (EQ (downch, ch)
+           c2 = DOWNCASE (c);
+         else if (!UPPERCASEP (c)
                    && (!inword || flag != CASE_CAPITALIZE_UP))
-           {
-             val = Faref (up, ch);
-             if (EQ (val, Qidentity))
-               val = ch;
-           }
-         else
-           val = ch;
-         if (!EQ (val, ch))
+           c2 = UPCASE1 (c);
+         if (c != c2)
             {
               int fromlen, tolen, j;
-             char workbuf[4], *str;
+             unsigned char workbuf[4], *str;
  
-             if (!NATNUMP (val))
-               error ("Inappropriate value found in case table");
               /* Handle the most likely case */
-             if (c < 0400 && XFASTINT (val) < 0400)
-               FETCH_BYTE (i) = XFASTINT (val);
+             if (c < 0400 && c2 < 0400)
+               FETCH_BYTE (i) = c2;
               else if (fromlen = CHAR_STRING (c, workbuf, str),
-                      tolen = CHAR_STRING (XFASTINT (val), workbuf, str),
+                      tolen = CHAR_STRING (c2, workbuf, str),
                        fromlen == tolen)
                 {
                   for (j = 0; j < tolen; ++j)
@@ -221,16 +268,16 @@ casify_region (flag, b, e)
                   else if (tolen > fromlen)
                     {
                       TEMP_SET_PT (i + fromlen);
-                     insert_1 (str + fromlen, tolen - fromlen, 1, 0);
+                     insert_1 (str + fromlen, tolen - fromlen, 1, 0, 0);
                     }
  #endif
                 }
             }
           if ((int) flag >= (int) CASE_CAPITALIZE)
-           inword = SYNTAX (XFASTINT (val)) == Sword;
+           inword = SYNTAX (c2) == Sword;
           INC_POS (i);
         }
-      TEMP_SET_PT (opoint);
+      TEMP_SET_PT_BOTH (opoint, opoint_byte);
      }
  
    signal_after_change (start, end - start, end - start);
@@ -358,6 +405,7 @@ With negative argument, capitalize previous words but do not move.")
    return Qnil;
  }
  \f
+void
  syms_of_casefiddle ()
  {
    Qidentity = intern ("identity");
@@ -375,6 +423,7 @@ syms_of_casefiddle ()
    defsubr (&Scapitalize_word);
  }
  
+void
  keys_of_casefiddle ()
  {
    initial_define_key (control_x_map, Ctl('U'), "upcase-region");