(gnus-start-news-server): Use expand-file-name, not

[gnu-emacs] / src / coding.c
diff --git a/src/coding.c b/src/coding.c

index bf3b731a99e4f2f5715ff96fc51d277ccc349d25..f44efa9415e976c1c68b314692766028ed1ad859 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -235,7 +235,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
      else                                                       \
        c = *src, bytes = 1;                                     \
      if (!NILP (translation_table))                             \
-      c = translate_char (translation_table, c, 0, 0, 0);      \
+      c = translate_char (translation_table, c, -1, 0, 0);     \
      src += bytes;                                              \
    } while (0)
  
@@ -337,7 +337,7 @@ Lisp_Object Qbuffer_file_coding_system;
  Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
  Lisp_Object Qno_conversion, Qundecided;
  Lisp_Object Qcoding_system_history;
-Lisp_Object Qsafe_charsets;
+Lisp_Object Qsafe_chars;
  Lisp_Object Qvalid_codes;
  
  extern Lisp_Object Qinsert_file_contents, Qwrite_region;
@@ -383,6 +383,9 @@ Lisp_Object Vlatin_extra_code_table;
  /* Flag to inhibit code conversion of end-of-line format.  */
  int inhibit_eol_conversion;
  
+/* Flag to inhibit ISO2022 escape sequence detection.  */
+int inhibit_iso_escape_detection;
+
  /* Flag to make buffer-file-coding-system inherit from process-coding.  */
  int inherit_process_coding_system;
  
@@ -468,6 +471,28 @@ Lisp_Object Vdefault_process_coding_system;
     to avoid infinite recursive call.  */
  static int inhibit_pre_post_conversion;
  
+/* Char-table containing safe coding systems of each character.  */
+Lisp_Object Vchar_coding_system_table;
+Lisp_Object Qchar_coding_system;
+
+/* Return `safe-chars' property of coding system CODING.  Don't check
+   validity of CODING.  */
+
+Lisp_Object
+coding_safe_chars (coding)
+     struct coding_system *coding;
+{
+  Lisp_Object coding_spec, plist, safe_chars;
+  
+  coding_spec = Fget (coding->symbol, Qcoding_system);
+  plist = XVECTOR (coding_spec)->contents[3];
+  safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
+  return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
+}
+
+#define CODING_SAFE_CHAR_P(safe_chars, c) \
+  (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
+
  \f
  /*** 2. Emacs internal format (emacs-mule) handlers ***/
  
@@ -584,13 +609,50 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
    unsigned char *src_base;
  
    coding->produced_char = 0;
-  while (src < src_end)
+  while ((src_base = src) < src_end)
      {
        unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
        int bytes;
  
-      src_base = src;
-      if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+      if (*src == '\r')
+       {
+         int c;
+
+         src++;
+         if (coding->eol_type == CODING_EOL_CR)
+           c = '\n';
+         else if (coding->eol_type == CODING_EOL_CRLF)
+           {
+             ONE_MORE_BYTE (c);
+             if (c != '\n')
+               {
+                 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+                   {
+                     coding->result = CODING_FINISH_INCONSISTENT_EOL;
+                     goto label_end_of_loop;
+                   }
+                 src--;
+                 c = '\r';
+               }
+           }
+         *dst++ = c;
+         coding->produced_char++;
+         continue;
+       }
+      else if (*src == '\n')
+       {
+         if ((coding->eol_type == CODING_EOL_CR
+              || coding->eol_type == CODING_EOL_CRLF)
+             && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+           {
+             coding->result = CODING_FINISH_INCONSISTENT_EOL;
+             goto label_end_of_loop;
+           }
+         *dst++ = *src++;
+         coding->produced_char++;
+         continue;
+       }
+      else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
         {
           p = src;
           src += bytes;
@@ -609,6 +671,7 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
        while (bytes--) *dst++ = *p++;
        coding->produced_char++;
      }
+ label_end_of_loop:
    coding->consumed = coding->consumed_char = src_base - source;
    coding->produced = dst - destination;
  }
@@ -795,12 +858,14 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
  
  enum iso_code_class_type iso_code_class[256];
  
-#define CHARSET_OK(idx, charset)                               \
-  (coding_system_table[idx]                                    \
-   && (coding_system_table[idx]->safe_charsets[charset]                \
-       || (CODING_SPEC_ISO_REQUESTED_DESIGNATION               \
-            (coding_system_table[idx], charset)                        \
-           != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
+#define CHARSET_OK(idx, charset, c)                                    \
+  (coding_system_table[idx]                                            \
+   && (charset == CHARSET_ASCII                                                \
+       || (safe_chars = coding_safe_chars (coding_system_table[idx]),  \
+          CODING_SAFE_CHAR_P (safe_chars, c)))                         \
+   && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx],        \
+                                             charset)                  \
+       != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
  
  #define SHIFT_OUT_OK(idx) \
    (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
@@ -828,6 +893,7 @@ detect_coding_iso2022 (src, src_end)
    /* Dummy for ONE_MORE_BYTE.  */
    struct coding_system dummy_coding;
    struct coding_system *coding = &dummy_coding;
+  Lisp_Object safe_chars;
  
    reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
    while (mask && src < src_end)
@@ -836,6 +902,8 @@ detect_coding_iso2022 (src, src_end)
        switch (c)
         {
         case ISO_CODE_ESC:
+         if (inhibit_iso_escape_detection)
+           break;
           single_shifting = 0;
           ONE_MORE_BYTE (c);
           if (c >= '(' && c <= '/')
@@ -886,25 +954,28 @@ detect_coding_iso2022 (src, src_end)
  
           /* We found a valid designation sequence for CHARSET.  */
           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
-         if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
+         c = MAKE_CHAR (charset, 0, 0);
+         if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
             mask_found |= CODING_CATEGORY_MASK_ISO_7;
           else
             mask &= ~CODING_CATEGORY_MASK_ISO_7;
-         if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
+         if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
           else
             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
-         if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
+         if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
             mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
           else
             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
-         if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
+         if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
             mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
           else
             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
           break;
  
         case ISO_CODE_SO:
+         if (inhibit_iso_escape_detection)
+           break;
           single_shifting = 0;
           if (shift_out == 0
               && (reg[1] >= 0
@@ -918,6 +989,8 @@ detect_coding_iso2022 (src, src_end)
           break;
           
         case ISO_CODE_SI:
+         if (inhibit_iso_escape_detection)
+           break;
           single_shifting = 0;
           if (shift_out == 1)
             {
@@ -934,6 +1007,8 @@ detect_coding_iso2022 (src, src_end)
           {
             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
  
+           if (inhibit_iso_escape_detection)
+             break;
             if (c != ISO_CODE_CSI)
               {
                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
@@ -987,8 +1062,6 @@ detect_coding_iso2022 (src, src_end)
             }
           else
             {
-             unsigned char *src_begin = src;
-
               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
@@ -999,7 +1072,7 @@ detect_coding_iso2022 (src, src_end)
               if (!single_shifting
                   && mask & CODING_CATEGORY_MASK_ISO_8_2)
                 {
-                 int i = 0;
+                 int i = 1;
                   while (src < src_end)
                     {
                       ONE_MORE_BYTE (c);
@@ -1034,16 +1107,17 @@ detect_coding_iso2022 (src, src_end)
  /* Set designation state into CODING.  */
  #define DECODE_DESIGNATION(reg, dimension, chars, final_char)             \
    do {                                                                    \
-    int charset;                                                          \
+    int charset, c;                                                       \
                                                                            \
      if (final_char < '0' || final_char >= 128)                            \
        goto label_invalid_code;                                            \
      charset = ISO_CHARSET_TABLE (make_number (dimension),                 \
                                  make_number (chars),                      \
                                  make_number (final_char));                \
+    c = MAKE_CHAR (charset, 0, 0);                                        \
      if (charset >= 0                                                      \
         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
-           || coding->safe_charsets[charset]))                            \
+           || CODING_SAFE_CHAR_P (safe_chars, c)))                        \
        {                                                                           \
         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
             && reg == 0                                                    \
@@ -1070,7 +1144,7 @@ detect_coding_iso2022 (src, src_end)
  /* Allocate a memory block for storing information about compositions.
     The block is chained to the already allocated blocks.  */
  
-static void
+void
  coding_allocate_composition_data (coding, char_offset)
       struct coding_system *coding;
       int char_offset;
@@ -1118,49 +1192,50 @@ coding_allocate_composition_data (coding, char_offset)
  
  /* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4.  */
  
-#define DECODE_COMPOSITION_START(c1)                                   \
-  do {                                                                 \
-    if (coding->composing == COMPOSITION_DISABLED)                     \
-      {                                                                        \
-       *dst++ = ISO_CODE_ESC;                                          \
-       *dst++ = c1 & 0x7f;                                             \
-       coding->produced_char += 2;                                     \
-      }                                                                        \
-    else if (!COMPOSING_P (coding))                                    \
-      {                                                                        \
-       /* This is surely the start of a composition.  We must be sure  \
-           that coding->cmp_data has enough space to store the         \
-           information about the composition.  If not, terminate the   \
-           current decoding loop, allocate one more memory block for   \
-           coding->cmp_data in the calller, then start the decoding    \
-           loop again.  We can't allocate memory here directly because \
-           it may cause buffer/string relocation.  */                  \
-       if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH  \
-           >= COMPOSITION_DATA_SIZE)                                   \
-         {                                                             \
-           coding->result = CODING_FINISH_INSUFFICIENT_CMP;            \
-           goto label_end_of_loop;                                     \
-         }                                                             \
-       coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE           \
-                            : c1 == '2' ? COMPOSITION_WITH_RULE        \
-                            : c1 == '3' ? COMPOSITION_WITH_ALTCHARS    \
-                            : COMPOSITION_WITH_RULE_ALTCHARS);         \
-       CODING_ADD_COMPOSITION_START (coding, coding->produced_char,    \
-                                     coding->composing);               \
-       coding->composition_rule_follows = 0;                           \
-      }                                                                        \
-    else                                                               \
-      {                                                                        \
-       /* We are already handling a composition.  If the method is     \
-           the following two, the codes following the current escape   \
-           sequence are actual characters stored in a buffer.  */      \
-       if (coding->composing == COMPOSITION_WITH_ALTCHARS              \
-           || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)     \
-         {                                                             \
-           coding->composing = COMPOSITION_RELATIVE;                   \
-           coding->composition_rule_follows = 0;                       \
-         }                                                             \
-      }                                                                        \
+#define DECODE_COMPOSITION_START(c1)                                      \
+  do {                                                                    \
+    if (coding->composing == COMPOSITION_DISABLED)                        \
+      {                                                                           \
+       *dst++ = ISO_CODE_ESC;                                             \
+       *dst++ = c1 & 0x7f;                                                \
+       coding->produced_char += 2;                                        \
+      }                                                                           \
+    else if (!COMPOSING_P (coding))                                       \
+      {                                                                           \
+       /* This is surely the start of a composition.  We must be sure     \
+           that coding->cmp_data has enough space to store the            \
+           information about the composition.  If not, terminate the      \
+           current decoding loop, allocate one more memory block for      \
+           coding->cmp_data in the calller, then start the decoding       \
+           loop again.  We can't allocate memory here directly because    \
+           it may cause buffer/string relocation.  */                     \
+       if (!coding->cmp_data                                              \
+           || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
+               >= COMPOSITION_DATA_SIZE))                                 \
+         {                                                                \
+           coding->result = CODING_FINISH_INSUFFICIENT_CMP;               \
+           goto label_end_of_loop;                                        \
+         }                                                                \
+       coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE              \
+                            : c1 == '2' ? COMPOSITION_WITH_RULE           \
+                            : c1 == '3' ? COMPOSITION_WITH_ALTCHARS       \
+                            : COMPOSITION_WITH_RULE_ALTCHARS);            \
+       CODING_ADD_COMPOSITION_START (coding, coding->produced_char,       \
+                                     coding->composing);                  \
+       coding->composition_rule_follows = 0;                              \
+      }                                                                           \
+    else                                                                  \
+      {                                                                           \
+       /* We are already handling a composition.  If the method is        \
+           the following two, the codes following the current escape      \
+           sequence are actual characters stored in a buffer.  */         \
+       if (coding->composing == COMPOSITION_WITH_ALTCHARS                 \
+           || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)        \
+         {                                                                \
+           coding->composing = COMPOSITION_RELATIVE;                      \
+           coding->composition_rule_follows = 0;                          \
+         }                                                                \
+      }                                                                           \
    } while (0)
  
  /* Handle compositoin end sequence ESC 1.  */
@@ -1229,6 +1304,9 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
    unsigned char *src_base;
    int c, charset;
    Lisp_Object translation_table;
+  Lisp_Object safe_chars;
+
+  safe_chars = coding_safe_chars (coding);
  
    if (NILP (Venable_character_translation))
      translation_table = Qnil;
@@ -1432,6 +1510,8 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                 goto label_invalid_code;
               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
               ONE_MORE_BYTE (c1);
+             if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+               goto label_invalid_code;
               break;
  
             case 'O':           /* invocation of single-shift-3 */
@@ -1440,6 +1520,8 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                 goto label_invalid_code;
               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
               ONE_MORE_BYTE (c1);
+             if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+               goto label_invalid_code;
               break;
  
             case '0': case '2': case '3': case '4': /* start composition */
@@ -1675,16 +1757,6 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
         *dst++ = c1 | 0x80;                                             \
         break;                                                          \
        }                                                                        \
-    else if (coding->flags & CODING_FLAG_ISO_SAFE                      \
-            && !coding->safe_charsets[charset])                        \
-      {                                                                        \
-       /* We should not encode this character, instead produce one or  \
-          two `?'s.  */                                                \
-       *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
-       if (CHARSET_WIDTH (charset) == 2)                               \
-         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
-       break;                                                          \
-      }                                                                        \
      else                                                               \
        /* Since CHARSET is not yet invoked to any graphic planes, we    \
          must invoke it, or, at first, designate it to some graphic     \
@@ -1718,16 +1790,6 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
         break;                                                          \
        }                                                                        \
-    else if (coding->flags & CODING_FLAG_ISO_SAFE                      \
-            && !coding->safe_charsets[charset])                        \
-      {                                                                        \
-       /* We should not encode this character, instead produce one or  \
-          two `?'s.  */                                                \
-       *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
-       if (CHARSET_WIDTH (charset) == 2)                               \
-         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
-       break;                                                          \
-      }                                                                        \
      else                                                               \
        /* Since CHARSET is not yet invoked to any graphic planes, we    \
          must invoke it, or, at first, designate it to some graphic     \
@@ -1736,35 +1798,47 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
        dst = encode_invocation_designation (charset, coding, dst);      \
    } while (1)
  
-#define ENCODE_ISO_CHARACTER(charset, c1, c2)                          \
+#define ENCODE_ISO_CHARACTER(c)                                        \
+  do {                                                         \
+    int charset, c1, c2;                                       \
+                                                               \
+    SPLIT_CHAR (c, charset, c1, c2);                           \
+    if (CHARSET_DEFINED_P (charset))                           \
+      {                                                                \
+       if (CHARSET_DIMENSION (charset) == 1)                   \
+         {                                                     \
+           if (charset == CHARSET_ASCII                        \
+               && coding->flags & CODING_FLAG_ISO_USE_ROMAN)   \
+             charset = charset_latin_jisx0201;                 \
+           ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1);      \
+         }                                                     \
+       else                                                    \
+         {                                                     \
+           if (charset == charset_jisx0208                     \
+               && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)  \
+             charset = charset_jisx0208_1978;                  \
+           ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2);  \
+         }                                                     \
+      }                                                                \
+    else                                                       \
+      {                                                                \
+       *dst++ = c1;                                            \
+       if (c2 >= 0)                                            \
+         *dst++ = c2;                                          \
+      }                                                                \
+  } while (0)
+
+
+/* Instead of encoding character C, produce one or two `?'s.  */
+
+#define ENCODE_UNSAFE_CHARACTER(c)                                     \
    do {                                                                 \
-    int alt_charset = charset;                                         \
-                                                                       \
-    if (CHARSET_DEFINED_P (charset))                                   \
-      {                                                                        \
-       if (CHARSET_DIMENSION (charset) == 1)                           \
-         {                                                             \
-           if (charset == CHARSET_ASCII                                \
-               && coding->flags & CODING_FLAG_ISO_USE_ROMAN)           \
-             alt_charset = charset_latin_jisx0201;                     \
-           ENCODE_ISO_CHARACTER_DIMENSION1 (alt_charset, c1);          \
-         }                                                             \
-       else                                                            \
-         {                                                             \
-           if (charset == charset_jisx0208                             \
-               && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)          \
-             alt_charset = charset_jisx0208_1978;                      \
-           ENCODE_ISO_CHARACTER_DIMENSION2 (alt_charset, c1, c2);      \
-         }                                                             \
-      }                                                                        \
-    else                                                               \
-      {                                                                        \
-       *dst++ = c1;                                                    \
-       if (c2 >= 0)                                                    \
-         *dst++ = c2;                                                  \
-      }                                                                        \
+    ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);      \
+    if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1)                          \
+      ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION);    \
    } while (0)
  
+
  /* Produce designation and invocation codes at a place pointed by DST
     to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
     Return new DST.  */
@@ -1988,6 +2062,9 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
    unsigned char *src_base;
    int c;
    Lisp_Object translation_table;
+  Lisp_Object safe_chars;
+
+  safe_chars = coding_safe_chars (coding);
  
    if (NILP (Venable_character_translation))
      translation_table = Qnil;
@@ -2002,8 +2079,6 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
    coding->errors = 0;
    while (1)
      {
-      int charset, c1, c2;
-
        src_base = src;
  
        if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
@@ -2056,8 +2131,11 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                     }
                   else
                     {
-                     SPLIT_CHAR (c, charset, c1, c2);
-                     ENCODE_ISO_CHARACTER (charset, c1, c2);
+                     if (coding->flags & CODING_FLAG_ISO_SAFE
+                         && ! CODING_SAFE_CHAR_P (safe_chars, c))
+                       ENCODE_UNSAFE_CHARACTER (c);
+                     else
+                       ENCODE_ISO_CHARACTER (c);
                       if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
                         coding->composition_rule_follows = 1;
                     }
@@ -2116,17 +2194,17 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
             }
         }
        else if (ASCII_BYTE_P (c))
-       ENCODE_ISO_CHARACTER (CHARSET_ASCII, c, /* dummy */ c1);
+       ENCODE_ISO_CHARACTER (c);
        else if (SINGLE_BYTE_CHAR_P (c))
         {
           *dst++ = c;
           coding->errors++;
         }
+      else if (coding->flags & CODING_FLAG_ISO_SAFE
+              && ! CODING_SAFE_CHAR_P (safe_chars, c))
+       ENCODE_UNSAFE_CHARACTER (c);
        else
-       {
-         SPLIT_CHAR (c, charset, c1, c2);
-         ENCODE_ISO_CHARACTER (charset, c1, c2);
-       }
+       ENCODE_ISO_CHARACTER (c);
  
        coding->consumed_char++;
      }
@@ -2227,10 +2305,15 @@ detect_coding_sjis (src, src_end)
    while (1)
      {
        ONE_MORE_BYTE (c);
-      if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
+      if (c >= 0x81)
         {
-         ONE_MORE_BYTE (c);
-         if (c < 0x40)
+         if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF))
+           {
+             ONE_MORE_BYTE (c);
+             if (c < 0x40 || c == 0x7F || c > 0xFC)
+               return 0;
+           }
+         else if (c > 0xDF)
             return 0;
         }
      }
@@ -2510,9 +2593,9 @@ encode_coding_sjis_big5 (coding, source, destination,
      translation_table = Qnil;
    else
      {
-      translation_table = coding->translation_table_for_decode;
+      translation_table = coding->translation_table_for_encode;
        if (NILP (translation_table))
-       translation_table = Vstandard_translation_table_for_decode;
+       translation_table = Vstandard_translation_table_for_encode;
      }
  
    while (1)
@@ -2557,6 +2640,8 @@ encode_coding_sjis_big5 (coding, source, destination,
                   ENCODE_SJIS (c1, c2, c1, c2);
                   EMIT_TWO_BYTES (c1, c2);
                 }
+             else if (charset == charset_katakana_jisx0201)
+               EMIT_ONE_BYTE (c1 | 0x80);
               else if (charset == charset_latin_jisx0201)
                 EMIT_ONE_BYTE (c1);
               else
@@ -2759,10 +2844,11 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes)
         }
        src_base = src;
      label_end_of_loop:
+      ;
      }
    else
      {
-      if (src_bytes <= dst_bytes)
+      if (!dst_bytes || src_bytes <= dst_bytes)
         {
           safe_bcopy (src, dst, src_bytes);
           src_base = src_end;
@@ -2794,6 +2880,7 @@ encode_eol (coding, source, destination, src_bytes, dst_bytes)
  
    coding->consumed = src_base - source;
    coding->produced = dst - destination;
+  coding->produced_char = coding->produced;
  }
  
  \f
@@ -2960,23 +3047,6 @@ setup_coding_system (coding_system, coding)
    else
      goto label_invalid_coding_system;
    
-  val = Fplist_get (plist, Qsafe_charsets);
-  if (EQ (val, Qt))
-    {
-      for (i = 0; i <= MAX_CHARSET; i++)
-       coding->safe_charsets[i] = 1;
-    }
-  else
-    {
-      bzero (coding->safe_charsets, MAX_CHARSET + 1);
-      while (CONSP (val))
-       {
-         if ((i = get_charset_id (XCAR (val))) >= 0)
-           coding->safe_charsets[i] = 1;
-         val = XCDR (val);
-       }
-    }
-
    /* If the coding system has non-nil `composition' property, enable
       composition handling.  */
    val = Fplist_get (plist, Qcomposition);
@@ -3131,10 +3201,12 @@ setup_coding_system (coding_system, coding)
         if (reg_bits)
           for (charset = 0; charset <= MAX_CHARSET; charset++)
             {
-             if (CHARSET_VALID_P (charset))
+             if (CHARSET_VALID_P (charset)
+                 && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+                     == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
                 {
                   /* There exist some default graphic registers to be
-                    used CHARSET.  */
+                    used by CHARSET.  */
  
                   /* We had better avoid designating a charset of
                      CHARS96 to REG 0 as far as possible.  */
@@ -3203,6 +3275,7 @@ setup_coding_system (coding_system, coding)
           }
        }
        coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
+      coding->spec.ccl.cr_carryover = 0;
        break;
  
      case 5:
@@ -3844,33 +3917,60 @@ encoding_buffer_size (coding, src_bytes)
    return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
  }
  
-#ifndef MINIMUM_CONVERSION_BUFFER_SIZE
-#define MINIMUM_CONVERSION_BUFFER_SIZE 1024
-#endif
+/* Working buffer for code conversion.  */
+struct conversion_buffer
+{
+  int size;                    /* size of data.  */
+  int on_stack;                        /* 1 if allocated by alloca.  */
+  unsigned char *data;
+};
  
-char *conversion_buffer;
-int conversion_buffer_size;
+/* Don't use alloca for allocating memory space larger than this, lest
+   we overflow their stack.  */
+#define MAX_ALLOCA 16*1024
  
-/* Return a pointer to a SIZE bytes of buffer to be used for encoding
-   or decoding.  Sufficient memory is allocated automatically.  If we
-   run out of memory, return NULL.  */
+/* Allocate LEN bytes of memory for BUF (struct conversion_buffer).  */
+#define allocate_conversion_buffer(buf, len)           \
+  do {                                                 \
+    if (len < MAX_ALLOCA)                              \
+      {                                                        \
+       buf.data = (unsigned char *) alloca (len);      \
+       buf.on_stack = 1;                               \
+      }                                                        \
+    else                                               \
+      {                                                        \
+       buf.data = (unsigned char *) xmalloc (len);     \
+       buf.on_stack = 0;                               \
+      }                                                        \
+    buf.size = len;                                    \
+  } while (0)
  
-char *
-get_conversion_buffer (size)
-     int size;
+/* Double the allocated memory for *BUF.  */
+static void
+extend_conversion_buffer (buf)
+     struct conversion_buffer *buf;
  {
-  if (size > conversion_buffer_size)
+  if (buf->on_stack)
      {
-      char *buf;
-      int real_size = conversion_buffer_size * 2;
-
-      while (real_size < size) real_size *= 2;
-      buf = (char *) xmalloc (real_size);
-      xfree (conversion_buffer);
-      conversion_buffer = buf;
-      conversion_buffer_size = real_size;
+      unsigned char *save = buf->data;
+      buf->data = (unsigned char *) xmalloc (buf->size * 2);
+      bcopy (save, buf->data, buf->size);
+      buf->on_stack = 0;
      }
-  return conversion_buffer;
+  else
+    {
+      buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
+    }
+  buf->size *= 2;
+}
+
+/* Free the allocated memory for BUF if it is not on stack.  */
+static void
+free_conversion_buffer (buf)
+     struct conversion_buffer *buf;
+{
+  if (!buf->on_stack)
+    xfree (buf->data);
  }
  
  int
@@ -3884,7 +3984,9 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
    int result;
  
    ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
-
+  if (encodep)
+    ccl->eol_type = coding->eol_type;
+  ccl->multibyte = coding->src_multibyte;
    coding->produced = ccl_driver (ccl, source, destination,
                                  src_bytes, dst_bytes, &(coding->consumed));
    if (encodep)
@@ -3901,20 +4003,156 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
    switch (ccl->status)
      {
      case CCL_STAT_SUSPEND_BY_SRC:
-      result = CODING_FINISH_INSUFFICIENT_SRC;
+      coding->result = CODING_FINISH_INSUFFICIENT_SRC;
        break;
      case CCL_STAT_SUSPEND_BY_DST:
-      result = CODING_FINISH_INSUFFICIENT_DST;
+      coding->result = CODING_FINISH_INSUFFICIENT_DST;
        break;
      case CCL_STAT_QUIT:
      case CCL_STAT_INVALID_CMD:
-      result = CODING_FINISH_INTERRUPT;
+      coding->result = CODING_FINISH_INTERRUPT;
        break;
      default:
-      result = CODING_FINISH_NORMAL;
+      coding->result = CODING_FINISH_NORMAL;
        break;
      }
-  return result;
+  return coding->result;
+}
+
+/* Decode EOL format of the text at PTR of BYTES length destructively
+   according to CODING->eol_type.  This is called after the CCL
+   program produced a decoded text at PTR.  If we do CRLF->LF
+   conversion, update CODING->produced and CODING->produced_char.  */
+
+static void
+decode_eol_post_ccl (coding, ptr, bytes)
+     struct coding_system *coding;
+     unsigned char *ptr;
+     int bytes;
+{
+  Lisp_Object val, saved_coding_symbol;
+  unsigned char *pend = ptr + bytes;
+  int dummy;
+
+  /* Remember the current coding system symbol.  We set it back when
+     an inconsistent EOL is found so that `last-coding-system-used' is
+     set to the coding system that doesn't specify EOL conversion.  */
+  saved_coding_symbol = coding->symbol;
+
+  coding->spec.ccl.cr_carryover = 0;
+  if (coding->eol_type == CODING_EOL_UNDECIDED)
+    {
+      /* Here, to avoid the call of setup_coding_system, we directly
+        call detect_eol_type.  */
+      coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
+      if (coding->eol_type == CODING_EOL_INCONSISTENT)
+       coding->eol_type = CODING_EOL_LF;
+      if (coding->eol_type != CODING_EOL_UNDECIDED)
+       {
+         val = Fget (coding->symbol, Qeol_type);
+         if (VECTORP (val) && XVECTOR (val)->size == 3)
+           coding->symbol = XVECTOR (val)->contents[coding->eol_type];
+       }
+      coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
+    }
+
+  if (coding->eol_type == CODING_EOL_LF
+      || coding->eol_type == CODING_EOL_UNDECIDED)
+    {
+      /* We have nothing to do.  */
+      ptr = pend;
+    }
+  else if (coding->eol_type == CODING_EOL_CRLF)
+    {
+      unsigned char *pstart = ptr, *p = ptr;
+
+      if (! (coding->mode & CODING_MODE_LAST_BLOCK)
+         && *(pend - 1) == '\r')
+       {
+         /* If the last character is CR, we can't handle it here
+            because LF will be in the not-yet-decoded source text.
+            Recorded that the CR is not yet processed.  */
+         coding->spec.ccl.cr_carryover = 1;
+         coding->produced--;
+         coding->produced_char--;
+         pend--;
+       }
+      while (ptr < pend)
+       {
+         if (*ptr == '\r')
+           {
+             if (ptr + 1 < pend && *(ptr + 1) == '\n')
+               {
+                 *p++ = '\n';
+                 ptr += 2;
+               }
+             else
+               {
+                 if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+                   goto undo_eol_conversion;
+                 *p++ = *ptr++;
+               }
+           }
+         else if (*ptr == '\n'
+                  && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+           goto undo_eol_conversion;
+         else
+           *p++ = *ptr++;
+         continue;
+
+       undo_eol_conversion:
+         /* We have faced with inconsistent EOL format at PTR.
+            Convert all LFs before PTR back to CRLFs.  */
+         for (p--, ptr--; p >= pstart; p--)
+           {
+             if (*p == '\n')
+               *ptr-- = '\n', *ptr-- = '\r';
+             else
+               *ptr-- = *p;
+           }
+         /*  If carryover is recorded, cancel it because we don't
+             convert CRLF anymore.  */
+         if (coding->spec.ccl.cr_carryover)
+           {
+             coding->spec.ccl.cr_carryover = 0;
+             coding->produced++;
+             coding->produced_char++;
+             pend++;
+           }
+         p = ptr = pend;
+         coding->eol_type = CODING_EOL_LF;
+         coding->symbol = saved_coding_symbol;
+       }
+      if (p < pend)
+       {
+         /* As each two-byte sequence CRLF was converted to LF, (PEND
+            - P) is the number of deleted characters.  */
+         coding->produced -= pend - p;
+         coding->produced_char -= pend - p;
+       }
+    }
+  else                 /* i.e. coding->eol_type == CODING_EOL_CR */
+    {
+      unsigned char *p = ptr;
+
+      for (; ptr < pend; ptr++)
+       {
+         if (*ptr == '\r')
+           *ptr = '\n';
+         else if (*ptr == '\n'
+                  && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+           {
+             for (; p < ptr; p++)
+               {
+                 if (*p == '\n')
+                   *p = '\r';
+               }
+             ptr = pend;
+             coding->eol_type = CODING_EOL_LF;
+             coding->symbol = saved_coding_symbol;
+           }
+       }
+    }
  }
  
  /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
@@ -3932,7 +4170,8 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes)
    if (coding->type == coding_type_undecided)
      detect_coding (coding, source, src_bytes);
  
-  if (coding->eol_type == CODING_EOL_UNDECIDED)
+  if (coding->eol_type == CODING_EOL_UNDECIDED
+      && coding->type != coding_type_ccl)
      detect_eol (coding, source, src_bytes);
  
    coding->produced = coding->produced_char = 0;
@@ -3963,8 +4202,20 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes)
        break;
  
      case coding_type_ccl:
-      ccl_coding_driver (coding, source, destination,
+      if (coding->spec.ccl.cr_carryover)
+       {
+         /* Set the CR which is not processed by the previous call of
+            decode_eol_post_ccl in DESTINATION.  */
+         *destination = '\r';
+         coding->produced++;
+         coding->produced_char++;
+         dst_bytes--;
+       }
+      ccl_coding_driver (coding, source,
+                        destination + coding->spec.ccl.cr_carryover,
                          src_bytes, dst_bytes, 0);
+      if (coding->eol_type != CODING_EOL_LF)
+       decode_eol_post_ccl (coding, destination, coding->produced);
        break;
  
      default:
@@ -3972,6 +4223,7 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes)
      }
  
    if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
+      && coding->mode & CODING_MODE_LAST_BLOCK
        && coding->consumed == src_bytes)
      coding->result = CODING_FINISH_NORMAL;
  
@@ -3982,7 +4234,7 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes)
        unsigned char *dst = destination + coding->produced;
  
        src_bytes -= coding->consumed;
-     coding->errors++;
+      coding->errors++;
        if (COMPOSING_P (coding))
         DECODE_COMPOSITION_END ('1');
        while (src_bytes--)
@@ -3993,6 +4245,7 @@ decode_coding (coding, source, destination, src_bytes, dst_bytes)
         }
        coding->consumed = coding->consumed_char = src - source;
        coding->produced = dst - destination;
+      coding->result = CODING_FINISH_NORMAL;
      }
  
    if (!coding->dst_multibyte)
@@ -4050,11 +4303,8 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes)
        encode_eol (coding, source, destination, src_bytes, dst_bytes);
      }
  
-  if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
-      && coding->consumed == src_bytes)
-    coding->result = CODING_FINISH_NORMAL;
-
-  if (coding->mode & CODING_MODE_LAST_BLOCK)
+  if (coding->mode & CODING_MODE_LAST_BLOCK
+      && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
      {
        unsigned char *src = source + coding->consumed;
        unsigned char *src_end = src + src_bytes;
@@ -4075,8 +4325,13 @@ encode_coding (coding, source, destination, src_bytes, dst_bytes)
           coding->consumed = src_bytes;
         }
        coding->produced = coding->produced_char = dst - destination;
+      coding->result = CODING_FINISH_NORMAL;
      }
  
+  if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
+      && coding->consumed == src_bytes)
+    coding->result = CODING_FINISH_NORMAL;
+
    return coding->result;
  }
  
@@ -4127,7 +4382,7 @@ shrink_decoding_region (beg, end, coding, str)
         if (!NILP (CHAR_TABLE_REF (translation_table, i)))
           break;
        if (i < 128)
-       /* Some ASCII character should be tranlsated.  We give up
+       /* Some ASCII character should be translated.  We give up
            shrinking.  */
         return;
      }
@@ -4148,6 +4403,9 @@ shrink_decoding_region (beg, end, coding, str)
        endp_orig = endp = begp + *end - *beg;
      }
  
+  eol_conversion = (coding->eol_type == CODING_EOL_CR
+                   || coding->eol_type == CODING_EOL_CRLF);
+
    switch (coding->type)
      {
      case coding_type_sjis:
@@ -4464,7 +4722,7 @@ coding_save_composition (coding, from, to, obj)
     CODING->cmp_data points to a memory block for the informaiton.  OBJ
     is a buffer or a string, defaults to the current buffer.  */
  
-static void
+void
  coding_restore_composition (coding, obj)
       struct coding_system *coding;
       Lisp_Object obj;
@@ -4481,7 +4739,8 @@ coding_restore_composition (coding, obj)
      {
        int i;
  
-      for (i = 0; i < cmp_data->used; i += cmp_data->data[i])
+      for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
+          i += cmp_data->data[i])
         {
           int *data = cmp_data->data + i;
           enum composition_method method = (enum composition_method) data[3];
@@ -4552,6 +4811,7 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
    if (replace)
      {
        int saved_from = from;
+      int saved_inhibit_modification_hooks;
  
        prepare_to_modify_buffer (from, to, &from);
        if (saved_from != from)
@@ -4560,6 +4820,14 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
           from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
           len_byte = to_byte - from_byte;
         }
+
+      /* The code conversion routine can not preserve text properties
+        for now.  So, we must remove all text properties in the
+        region.  Here, we must suppress all modification hooks.  */
+      saved_inhibit_modification_hooks = inhibit_modification_hooks;
+      inhibit_modification_hooks = 1;
+      Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
+      inhibit_modification_hooks = saved_inhibit_modification_hooks;
      }
  
    if (! encodep && CODING_REQUIRE_DETECTION (coding))
@@ -4572,13 +4840,17 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
         {
           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
           if (coding->type == coding_type_undecided)
-           /* It seems that the text contains only ASCII, but we
-              should not left it undecided because the deeper
-              decoding routine (decode_coding) tries to detect the
-              encodings again in vain.  */
-           coding->type = coding_type_emacs_mule;
+           {
+             /* It seems that the text contains only ASCII, but we
+                should not leave it undecided because the deeper
+                decoding routine (decode_coding) tries to detect the
+                encodings again in vain.  */
+             coding->type = coding_type_emacs_mule;
+             coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
+           }
         }
-      if (coding->eol_type == CODING_EOL_UNDECIDED)
+      if (coding->eol_type == CODING_EOL_UNDECIDED
+         && coding->type != coding_type_ccl)
         {
           saved_coding_symbol = coding->symbol;
           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
@@ -4648,41 +4920,31 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
      }
  
    /* Try to skip the heading and tailing ASCIIs.  */
-  {
-    int from_byte_orig = from_byte, to_byte_orig = to_byte;
-
-    if (from < GPT && GPT < to)
-      move_gap_both (from, from_byte);
-    SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
-    if (from_byte == to_byte
-       && (encodep || NILP (coding->post_read_conversion))
-       && ! CODING_REQUIRE_FLUSHING (coding))
-      {
-       coding->produced = len_byte;
-       coding->produced_char = len;
-       if (!replace)
-         /* We must record and adjust for this new text now.  */
-         adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
-       return 0;
-      }
+  if (coding->type != coding_type_ccl)
+    {
+      int from_byte_orig = from_byte, to_byte_orig = to_byte;
  
-    head_skip = from_byte - from_byte_orig;
-    tail_skip = to_byte_orig - to_byte;
-    total_skip = head_skip + tail_skip;
-    from += head_skip;
-    to -= tail_skip;
-    len -= total_skip; len_byte -= total_skip;
-  }
+      if (from < GPT && GPT < to)
+       move_gap_both (from, from_byte);
+      SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
+      if (from_byte == to_byte
+         && (encodep || NILP (coding->post_read_conversion))
+         && ! CODING_REQUIRE_FLUSHING (coding))
+       {
+         coding->produced = len_byte;
+         coding->produced_char = len;
+         if (!replace)
+           /* We must record and adjust for this new text now.  */
+           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
+         return 0;
+       }
  
-  /* The code conversion routine can not preserve text properties for
-     now.  So, we must remove all text properties in the region.
-     Here, we must suppress all modification hooks.  */
-  if (replace)
-    {
-      int saved_inhibit_modification_hooks = inhibit_modification_hooks;
-      inhibit_modification_hooks = 1;
-      Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
-      inhibit_modification_hooks = saved_inhibit_modification_hooks;
+      head_skip = from_byte - from_byte_orig;
+      tail_skip = to_byte_orig - to_byte;
+      total_skip = head_skip + tail_skip;
+      from += head_skip;
+      to -= tail_skip;
+      len -= total_skip; len_byte -= total_skip;
      }
  
    /* For converion, we must put the gap before the text in addition to
@@ -5000,9 +5262,12 @@ run_pre_post_conversion_on_str (str, coding, encodep)
    if (encodep)
      call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
    else
-    call1 (coding->post_read_conversion, make_number (Z - BEG));
+    {
+      TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
+      call1 (coding->post_read_conversion, make_number (Z - BEG));
+    }
    inhibit_pre_post_conversion = 0;
-  str = make_buffer_string (BEG, Z, 0);
+  str = make_buffer_string (BEG, Z, 1);
    return unbind_to (count, str);
  }
  
@@ -5013,11 +5278,15 @@ decode_coding_string (str, coding, nocopy)
       int nocopy;
  {
    int len;
-  char *buf;
+  struct conversion_buffer buf;
    int from, to, to_byte;
    struct gcpro gcpro1;
    Lisp_Object saved_coding_symbol;
    int result;
+  int require_decoding;
+  int shrinked_bytes = 0;
+  Lisp_Object newstr;
+  int consumed, consumed_char, produced, produced_char;
  
    from = 0;
    to = XSTRING (str)->size;
@@ -5033,7 +5302,8 @@ decode_coding_string (str, coding, nocopy)
           if (coding->type == coding_type_undecided)
             coding->type = coding_type_emacs_mule;
         }
-      if (coding->eol_type == CODING_EOL_UNDECIDED)
+      if (coding->eol_type == CODING_EOL_UNDECIDED
+         && coding->type != coding_type_ccl)
         {
           saved_coding_symbol = coding->symbol;
           detect_eol (coding, XSTRING (str)->data, to_byte);
@@ -5045,74 +5315,124 @@ decode_coding_string (str, coding, nocopy)
         }
      }
  
-  if (! CODING_REQUIRE_DECODING (coding))
-    {
-      if (!STRING_MULTIBYTE (str))
-       {
-         str = Fstring_as_multibyte (str);
-         nocopy = 1;
-       }
-      return (nocopy ? str : Fcopy_sequence (str));
-    }
+  coding->src_multibyte = 0;
+  coding->dst_multibyte = (coding->type != coding_type_no_conversion
+                          && coding->type != coding_type_raw_text);
+  require_decoding = CODING_REQUIRE_DECODING (coding);
  
    if (STRING_MULTIBYTE (str))
      {
        /* Decoding routines expect the source text to be unibyte.  */
        str = Fstring_as_unibyte (str);
+      to_byte = STRING_BYTES (XSTRING (str));
        nocopy = 1;
-      coding->src_multibyte = 0;
      }
-  coding->dst_multibyte = 1;
-
-  if (coding->composing != COMPOSITION_DISABLED)
-    coding_allocate_composition_data (coding, from);
  
    /* Try to skip the heading and tailing ASCIIs.  */
-  {
-    int from_orig = from;
+  if (require_decoding && coding->type != coding_type_ccl)
+    {
+      SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+                               0);
+      if (from == to_byte)
+       require_decoding = 0;
+      shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
+    }
  
-    SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
-                             0);
-    if (from == to_byte)
+  if (!require_decoding)
+    {
+      coding->consumed = STRING_BYTES (XSTRING (str));
+      coding->consumed_char = XSTRING (str)->size;
+      if (coding->dst_multibyte)
+       {
+         str = Fstring_as_multibyte (str);
+         nocopy = 1;
+       }
+      coding->produced = STRING_BYTES (XSTRING (str));
+      coding->produced_char = XSTRING (str)->size;
        return (nocopy ? str : Fcopy_sequence (str));
-  }
+    }
  
+  if (coding->composing != COMPOSITION_DISABLED)
+    coding_allocate_composition_data (coding, from);
    len = decoding_buffer_size (coding, to_byte - from);
-  len += from + STRING_BYTES (XSTRING (str)) - to_byte;
-  GCPRO1 (str);
-  buf = get_conversion_buffer (len);
-  UNGCPRO;
+  allocate_conversion_buffer (buf, len);
  
-  if (from > 0)
-    bcopy (XSTRING (str)->data, buf, from);
-  result = decode_coding (coding, XSTRING (str)->data + from,
-                        buf + from, to_byte - from, len);
-  if (result == CODING_FINISH_INCONSISTENT_EOL)
+  consumed = consumed_char = produced = produced_char = 0;
+  while (1)
      {
-      /* We simply try to decode the whole string again but without
-         eol-conversion this time.  */
-      coding->eol_type = CODING_EOL_LF;
-      coding->symbol = saved_coding_symbol;
-      coding_free_composition_data (coding);
-      return decode_coding_string (str, coding, nocopy);
+      result = decode_coding (coding, XSTRING (str)->data + from + consumed,
+                             buf.data + produced, to_byte - from - consumed,
+                             buf.size - produced);
+      consumed += coding->consumed;
+      consumed_char += coding->consumed_char;
+      produced += coding->produced;
+      produced_char += coding->produced_char;
+      if (result == CODING_FINISH_NORMAL
+         || (result == CODING_FINISH_INSUFFICIENT_SRC
+             && coding->consumed == 0))
+       break;
+      if (result == CODING_FINISH_INSUFFICIENT_CMP)
+       coding_allocate_composition_data (coding, from + produced_char);
+      else if (result == CODING_FINISH_INSUFFICIENT_DST)
+       extend_conversion_buffer (&buf);
+      else if (result == CODING_FINISH_INCONSISTENT_EOL)
+       {
+         /* Recover the original EOL format.  */
+         if (coding->eol_type == CODING_EOL_CR)
+           {
+             unsigned char *p;
+             for (p = buf.data; p < buf.data + produced; p++)
+               if (*p == '\n') *p = '\r';
+           }
+         else if (coding->eol_type == CODING_EOL_CRLF)
+           {
+             int num_eol = 0;
+             unsigned char *p0, *p1;
+             for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
+               if (*p0 == '\n') num_eol++;
+             if (produced + num_eol >= buf.size)
+               extend_conversion_buffer (&buf);
+             for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
+               {
+                 *--p1 = *--p0;
+                 if (*p0 == '\n') *--p1 = '\r';
+               }
+             produced += num_eol;
+             produced_char += num_eol;
+           } 
+         coding->eol_type = CODING_EOL_LF;
+         coding->symbol = saved_coding_symbol;
+       }
      }
  
-  bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
-        STRING_BYTES (XSTRING (str)) - to_byte);
+  coding->consumed = consumed;
+  coding->consumed_char = consumed_char;
+  coding->produced = produced;
+  coding->produced_char = produced_char;
  
-  len = from + STRING_BYTES (XSTRING (str)) - to_byte;
-  str = make_multibyte_string (buf, len + coding->produced_char,
-                              len + coding->produced);
+  if (coding->dst_multibyte)
+    newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
+                                          produced + shrinked_bytes);
+  else
+    newstr = make_uninit_string (produced + shrinked_bytes);
+  if (from > 0)
+    bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
+  bcopy (buf.data, XSTRING (newstr)->data + from, produced);
+  if (shrinked_bytes > from)
+    bcopy (XSTRING (str)->data + to_byte,
+          XSTRING (newstr)->data + from + produced,
+          shrinked_bytes - from);
+  free_conversion_buffer (&buf);
  
    if (coding->cmp_data && coding->cmp_data->used)
-    coding_restore_composition (coding, str);
+    coding_restore_composition (coding, newstr);
    coding_free_composition_data (coding);
  
    if (SYMBOLP (coding->post_read_conversion)
        && !NILP (Ffboundp (coding->post_read_conversion)))
-    str = run_pre_post_conversion_on_str (str, 0);
+    newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
  
-  return str;
+  return newstr;
  }
  
  Lisp_Object
@@ -5122,67 +5442,95 @@ encode_coding_string (str, coding, nocopy)
       int nocopy;
  {
    int len;
-  char *buf;
+  struct conversion_buffer buf;
    int from, to, to_byte;
    struct gcpro gcpro1;
    Lisp_Object saved_coding_symbol;
    int result;
+  int shrinked_bytes = 0;
+  Lisp_Object newstr;
+  int consumed, consumed_char, produced, produced_char;
  
    if (SYMBOLP (coding->pre_write_conversion)
        && !NILP (Ffboundp (coding->pre_write_conversion)))
-    str = run_pre_post_conversion_on_str (str, 1);
+    str = run_pre_post_conversion_on_str (str, coding, 1);
  
    from = 0;
    to = XSTRING (str)->size;
    to_byte = STRING_BYTES (XSTRING (str));
  
    saved_coding_symbol = Qnil;
+
+  /* Encoding routines determine the multibyteness of the source text
+     by coding->src_multibyte.  */
+  coding->src_multibyte = STRING_MULTIBYTE (str);
+  coding->dst_multibyte = 0;
    if (! CODING_REQUIRE_ENCODING (coding))
      {
+      coding->consumed = STRING_BYTES (XSTRING (str));
+      coding->consumed_char = XSTRING (str)->size;
        if (STRING_MULTIBYTE (str))
         {
           str = Fstring_as_unibyte (str);
           nocopy = 1;
         }
+      coding->produced = STRING_BYTES (XSTRING (str));
+      coding->produced_char = XSTRING (str)->size;
        return (nocopy ? str : Fcopy_sequence (str));
      }
  
-  /* Encoding routines determine the multibyteness of the source text
-     by coding->src_multibyte.  */
-  coding->src_multibyte = STRING_MULTIBYTE (str);
-  coding->dst_multibyte = 0;
-
    if (coding->composing != COMPOSITION_DISABLED)
      coding_save_composition (coding, from, to, str);
  
    /* Try to skip the heading and tailing ASCIIs.  */
-  {
-    int from_orig = from;
-
-    SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
-                             1);
-    if (from == to_byte)
-      return (nocopy ? str : Fcopy_sequence (str));
-  }
+  if (coding->type != coding_type_ccl)
+    {
+      SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+                               1);
+      if (from == to_byte)
+       return (nocopy ? str : Fcopy_sequence (str));
+      shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
+    }
  
    len = encoding_buffer_size (coding, to_byte - from);
-  len += from + STRING_BYTES (XSTRING (str)) - to_byte;
-  GCPRO1 (str);
-  buf = get_conversion_buffer (len);
-  UNGCPRO;
+  allocate_conversion_buffer (buf, len);
+
+  consumed = consumed_char = produced = produced_char = 0;
+  while (1)
+    {
+      result = encode_coding (coding, XSTRING (str)->data + from + consumed,
+                             buf.data + produced, to_byte - from - consumed,
+                             buf.size - produced);
+      consumed += coding->consumed;
+      consumed_char += coding->consumed_char;
+      produced += coding->produced;
+      produced_char += coding->produced_char;
+      if (result == CODING_FINISH_NORMAL
+         || (result == CODING_FINISH_INSUFFICIENT_SRC
+             && coding->consumed == 0))
+       break;
+      /* Now result should be CODING_FINISH_INSUFFICIENT_DST.  */
+      extend_conversion_buffer (&buf);
+    }
  
+  coding->consumed = consumed;
+  coding->consumed_char = consumed_char;
+  coding->produced = produced;
+  coding->produced_char = produced_char;
+
+  newstr = make_uninit_string (produced + shrinked_bytes);
    if (from > 0)
-    bcopy (XSTRING (str)->data, buf, from);
-  result = encode_coding (coding, XSTRING (str)->data + from,
-                         buf + from, to_byte - from, len);
-  bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
-        STRING_BYTES (XSTRING (str)) - to_byte);
-
-  len = from + STRING_BYTES (XSTRING (str)) - to_byte;
-  str = make_unibyte_string (buf, len + coding->produced);
+    bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
+  bcopy (buf.data, XSTRING (newstr)->data + from, produced);
+  if (shrinked_bytes > from)
+    bcopy (XSTRING (str)->data + to_byte,
+          XSTRING (newstr)->data + from + produced,
+          shrinked_bytes - from);
+
+  free_conversion_buffer (&buf);
    coding_free_composition_data (coding);
  
-  return str;
+  return newstr;
  }
  
  \f
@@ -5369,6 +5717,160 @@ highest priority.")
                                !NILP (highest));
  }
  
+/* Return an intersection of lists L1 and L2.  */
+
+static Lisp_Object
+intersection (l1, l2)
+     Lisp_Object l1, l2;
+{
+  Lisp_Object val;
+
+  for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
+    {
+      if (!NILP (Fmemq (XCAR (l1), l2)))
+       val = Fcons (XCAR (l1), val);
+    }
+  return val;
+}
+
+
+/*  Subroutine for Fsafe_coding_systems_region_internal.
+
+    Return a list of coding systems that safely encode the multibyte
+    text between P and PEND.  SAFE_CODINGS, if non-nil, is a list of
+    possible coding systems.  If it is nil, it means that we have not
+    yet found any coding systems.
+
+    WORK_TABLE is a copy of the char-table Vchar_coding_system_table.  An
+    element of WORK_TABLE is set to t once the element is looked up.
+
+    If a non-ASCII single byte char is found, set
+    *single_byte_char_found to 1.  */
+
+static Lisp_Object
+find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
+     unsigned char *p, *pend;
+     Lisp_Object safe_codings, work_table;
+     int *single_byte_char_found;
+{
+  int c, len, idx;
+  Lisp_Object val;
+
+  while (p < pend)
+    {
+      c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
+      p += len;
+      if (ASCII_BYTE_P (c))
+       /* We can ignore ASCII characters here.  */
+       continue;
+      if (SINGLE_BYTE_CHAR_P (c))
+       *single_byte_char_found = 1;
+      if (NILP (safe_codings))
+       continue;
+      /* Check the safe coding systems for C.  */
+      val = char_table_ref_and_index (work_table, c, &idx);
+      if (EQ (val, Qt))
+       /* This element was already checked.  Ignore it.  */
+       continue;
+      /* Remember that we checked this element.  */
+      CHAR_TABLE_SET (work_table, make_number (idx), Qt);
+
+      /* If there are some safe coding systems for C and we have
+        already found the other set of coding systems for the
+        different characters, get the intersection of them.  */
+      if (!EQ (safe_codings, Qt) && !NILP (val))
+       val = intersection (safe_codings, val);
+      safe_codings = val;
+    }
+  return safe_codings;
+}
+
+
+/* Return a list of coding systems that safely encode the text between
+   START and END.  If the text contains only ASCII or is unibyte,
+   return t.  */
+
+DEFUN ("find-coding-systems-region-internal",
+       Ffind_coding_systems_region_internal,
+       Sfind_coding_systems_region_internal, 2, 2, 0,
+  "Internal use only.")
+  (start, end)
+     Lisp_Object start, end;
+{
+  Lisp_Object work_table, safe_codings;
+  int non_ascii_p = 0;
+  int single_byte_char_found = 0;
+  unsigned char *p1, *p1end, *p2, *p2end, *p;
+  Lisp_Object args[2];
+
+  if (STRINGP (start))
+    {
+      if (!STRING_MULTIBYTE (start))
+       return Qt;
+      p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
+      p2 = p2end = p1end;
+      if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
+       non_ascii_p = 1;
+    }
+  else
+    {
+      int from, to, stop;
+
+      CHECK_NUMBER_COERCE_MARKER (start, 0);
+      CHECK_NUMBER_COERCE_MARKER (end, 1);
+      if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
+       args_out_of_range (start, end);
+      if (NILP (current_buffer->enable_multibyte_characters))
+       return Qt;
+      from = CHAR_TO_BYTE (XINT (start));
+      to = CHAR_TO_BYTE (XINT (end));
+      stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
+      p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
+      if (stop == to)
+       p2 = p2end = p1end;
+      else
+       p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
+      if (XINT (end) - XINT (start) != to - from)
+       non_ascii_p = 1;
+    }
+
+  if (!non_ascii_p)
+    {
+      /* We are sure that the text contains no multibyte character.
+        Check if it contains eight-bit-graphic.  */
+      p = p1;
+      for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
+      if (p == p1end)
+       {
+         for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);      
+         if (p == p2end)
+           return Qt;
+       }
+    }
+
+  /* The text contains non-ASCII characters.  */
+  work_table = Fcopy_sequence (Vchar_coding_system_table);
+  safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
+                                   &single_byte_char_found);
+  if (p2 < p2end)
+    safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
+                                     &single_byte_char_found);
+
+  if (!single_byte_char_found)
+    {
+      /* Append generic coding systems.  */
+      Lisp_Object args[2];
+      args[0] = safe_codings;
+      args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
+                                       make_number (0));
+      safe_codings = Fappend (2, args);
+    }
+  else
+    safe_codings = Fcons (Qraw_text, Fcons (Qemacs_mule, safe_codings));
+  return safe_codings;
+}
+
+
  Lisp_Object
  code_convert_region1 (start, end, coding_system, encodep)
       Lisp_Object start, end, coding_system;
@@ -5854,12 +6356,6 @@ This function is internal use only.")
  \f
  /*** 9. Post-amble ***/
  
-void
-init_coding ()
-{
-  conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
-}
-
  void
  init_coding_once ()
  {
@@ -5900,8 +6396,6 @@ init_coding_once ()
    iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
    iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
  
-  conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
-
    setup_coding_system (Qnil, &keyboard_coding);
    setup_coding_system (Qnil, &terminal_coding);
    setup_coding_system (Qnil, &safe_terminal_coding);
@@ -6023,8 +6517,18 @@ syms_of_coding ()
    Qtranslation_table_for_encode = intern ("translation-table-for-encode");
    staticpro (&Qtranslation_table_for_encode);
  
-  Qsafe_charsets = intern ("safe-charsets");
-  staticpro (&Qsafe_charsets);
+  Qsafe_chars = intern ("safe-chars");
+  staticpro (&Qsafe_chars);
+
+  Qchar_coding_system = intern ("char-coding-system");
+  staticpro (&Qchar_coding_system);
+
+  /* Intern this now in case it isn't already done.
+     Setting this variable twice is harmless.
+     But don't staticpro it here--that is done in alloc.c.  */
+  Qchar_table_extra_slots = intern ("char-table-extra-slots");
+  Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
+  Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
  
    Qvalid_codes = intern ("valid-codes");
    staticpro (&Qvalid_codes);
@@ -6041,6 +6545,7 @@ syms_of_coding ()
    defsubr (&Scheck_coding_system);
    defsubr (&Sdetect_coding_region);
    defsubr (&Sdetect_coding_string);
+  defsubr (&Sfind_coding_systems_region_internal);
    defsubr (&Sdecode_coding_region);
    defsubr (&Sencode_coding_region);
    defsubr (&Sdecode_coding_string);
@@ -6178,6 +6683,7 @@ See also the function `find-operation-coding-system'.");
      "Coding system to use with system messages.");
    Vlocale_coding_system = Qnil;
  
+  /* The eol mnemonics are reset in startup.el system-dependently.  */
    DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
      "*String displayed in mode line for UNIX-like (LF) end-of-line format.");
    eol_mnemonic_unix = build_string (":");
@@ -6243,6 +6749,38 @@ coding system used in each operation can't encode the text.\n\
  The default value is `select-safe-coding-system' (which see).");
    Vselect_safe_coding_system_function = Qnil;
  
+  DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
+    "Char-table containing safe coding systems of each characters.\n\
+Each element doesn't include such generic coding systems that can\n\
+encode any characters.   They are in the first extra slot.");
+  Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
+
+  DEFVAR_BOOL ("inhibit-iso-escape-detection",
+              &inhibit_iso_escape_detection,
+    "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
+\n\
+By default, on reading a file, Emacs tries to detect how the text is\n\
+encoded.  This code detection is sensitive to escape sequences.  If\n\
+the sequence is valid as ISO2022, the code is determined as one of\n\
+the ISO2022 encodings, and the file is decoded by the corresponding\n\
+coding system (e.g. `iso-2022-7bit').\n\
+\n\
+However, there may be a case that you want to read escape sequences in\n\
+a file as is.  In such a case, you can set this variable to non-nil.\n\
+Then, as the code detection ignores any escape sequences, no file is\n\
+detected as encoded in some ISO2022 encoding.  The result is that all\n\
+escape sequences become visible in a buffer.\n\
+\n\
+The default value is nil, and it is strongly recommended not to change\n\
+it.  That is because many Emacs Lisp source files that contain\n\
+non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
+in Emacs's distribution, and they won't be decoded correctly on\n\
+reading if you suppress escape sequence detection.\n\
+\n\
+The other way to read escape sequences in a file without decoding is\n\
+to explicitly specify some coding system that doesn't use ISO2022's\n\
+escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
+  inhibit_iso_escape_detection = 0;
  }
  
  char *
@@ -6266,3 +6804,4 @@ emacs_strerror (error_number)
  }
  
  #endif /* emacs */
+