]> code.delx.au - gnu-emacs/blobdiff - src/coding.c
(is_simple_dialog, simple_dialog_show): New functions.
[gnu-emacs] / src / coding.c
index a43b5cb81875ef5bb738cd0f4ce5cab9552941cd..81e8e4e10bc9e99efe2bceab2aedd1b9f81c8579 100644 (file)
@@ -625,6 +625,7 @@ enum coding_category
    | CATEGORY_MASK_ISO_7_ELSE          \
    | CATEGORY_MASK_ISO_8_ELSE          \
    | CATEGORY_MASK_UTF_8               \
+   | CATEGORY_MASK_UTF_16_AUTO         \
    | CATEGORY_MASK_UTF_16_BE           \
    | CATEGORY_MASK_UTF_16_LE           \
    | CATEGORY_MASK_UTF_16_BE_NOSIG     \
@@ -657,7 +658,8 @@ enum coding_category
      | CATEGORY_MASK_ISO_ELSE)
 
 #define CATEGORY_MASK_UTF_16           \
-  (CATEGORY_MASK_UTF_16_BE             \
+  (CATEGORY_MASK_UTF_16_AUTO           \
+   | CATEGORY_MASK_UTF_16_BE           \
    | CATEGORY_MASK_UTF_16_LE           \
    | CATEGORY_MASK_UTF_16_BE_NOSIG     \
    | CATEGORY_MASK_UTF_16_LE_NOSIG)
@@ -898,7 +900,7 @@ static INLINE void produce_charset P_ ((struct coding_system *, int *,
 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 static int decode_coding P_ ((struct coding_system *));
 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
-                                                     struct coding_system *, 
+                                                     struct coding_system *,
                                                      int *, EMACS_INT *));
 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
                                                  struct coding_system *,
@@ -955,6 +957,11 @@ record_conversion_result (struct coding_system *coding,
   } while (0)
 
 
+/* If there are at least BYTES length of room at dst, allocate memory
+   for coding->destination and update dst and dst_end.  We don't have
+   to take care of coding->source which will be relocated.  It is
+   handled by calling coding_set_source in encode_coding.  */
+
 #define ASSURE_DESTINATION(bytes)                              \
   do {                                                         \
     if (dst + (bytes) >= dst_end)                              \
@@ -967,6 +974,66 @@ record_conversion_result (struct coding_system *coding,
   } while (0)
 
 
+/* Store multibyte form of the character C in P, and advance P to the
+   end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
+   never calls MAYBE_UNIFY_CHAR.  */
+
+#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)     \
+  do {                                         \
+    if ((c) <= MAX_1_BYTE_CHAR)                        \
+      *(p)++ = (c);                            \
+    else if ((c) <= MAX_2_BYTE_CHAR)           \
+      *(p)++ = (0xC0 | ((c) >> 6)),            \
+       *(p)++ = (0x80 | ((c) & 0x3F));         \
+    else if ((c) <= MAX_3_BYTE_CHAR)           \
+      *(p)++ = (0xE0 | ((c) >> 12)),           \
+       *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
+       *(p)++ = (0x80 | ((c) & 0x3F));         \
+    else if ((c) <= MAX_4_BYTE_CHAR)           \
+      *(p)++ = (0xF0 | (c >> 18)),             \
+       *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
+       *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
+       *(p)++ = (0x80 | (c & 0x3F));           \
+    else if ((c) <= MAX_5_BYTE_CHAR)           \
+      *(p)++ = 0xF8,                           \
+       *(p)++ = (0x80 | ((c >> 18) & 0x0F)),   \
+       *(p)++ = (0x80 | ((c >> 12) & 0x3F)),   \
+       *(p)++ = (0x80 | ((c >> 6) & 0x3F)),    \
+       *(p)++ = (0x80 | (c & 0x3F));           \
+    else                                       \
+      (p) += BYTE8_STRING ((c) - 0x3FFF80, p); \
+  } while (0)
+
+
+/* Return the character code of character whose multibyte form is at
+   P, and advance P to the end of the multibyte form.  This is like
+   STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
+
+#define STRING_CHAR_ADVANCE_NO_UNIFY(p)                                \
+  (!((p)[0] & 0x80)                                            \
+   ? *(p)++                                                    \
+   : ! ((p)[0] & 0x20)                                         \
+   ? ((p) += 2,                                                        \
+      ((((p)[-2] & 0x1F) << 6)                                 \
+       | ((p)[-1] & 0x3F)                                      \
+       | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))   \
+   : ! ((p)[0] & 0x10)                                         \
+   ? ((p) += 3,                                                        \
+      ((((p)[-3] & 0x0F) << 12)                                        \
+       | (((p)[-2] & 0x3F) << 6)                               \
+       | ((p)[-1] & 0x3F)))                                    \
+   : ! ((p)[0] & 0x08)                                         \
+   ? ((p) += 4,                                                        \
+      ((((p)[-4] & 0xF) << 18)                                 \
+       | (((p)[-3] & 0x3F) << 12)                              \
+       | (((p)[-2] & 0x3F) << 6)                               \
+       | ((p)[-1] & 0x3F)))                                    \
+   : ((p) += 5,                                                        \
+      ((((p)[-4] & 0x3F) << 18)                                        \
+       | (((p)[-3] & 0x3F) << 12)                              \
+       | (((p)[-2] & 0x3F) << 6)                               \
+       | ((p)[-1] & 0x3F))))
+
 
 static void
 coding_set_source (coding)
@@ -1032,20 +1099,23 @@ coding_alloc_by_realloc (coding, bytes)
 }
 
 static void
-coding_alloc_by_making_gap (coding, offset, bytes)
+coding_alloc_by_making_gap (coding, gap_head_used, bytes)
      struct coding_system *coding;
-     EMACS_INT offset, bytes;
+     EMACS_INT gap_head_used, bytes;
 {
-  if (BUFFERP (coding->dst_object)
-      && EQ (coding->src_object, coding->dst_object))
+  if (EQ (coding->src_object, coding->dst_object))
     {
-      EMACS_INT add = offset + (coding->src_bytes - coding->consumed);
+      /* The gap may contain the produced data at the head and not-yet
+        consumed data at the tail.  To preserve those data, we at
+        first make the gap size to zero, then increase the gap
+        size.  */
+      EMACS_INT add = GAP_SIZE;
 
-      GPT += offset, GPT_BYTE += offset;
-      GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
+      GPT += gap_head_used, GPT_BYTE += gap_head_used;
+      GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
       make_gap (bytes);
       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
-      GPT -= offset, GPT_BYTE -= offset;
+      GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
     }
   else
     {
@@ -1068,7 +1138,11 @@ alloc_destination (coding, nbytes, dst)
   EMACS_INT offset = dst - coding->destination;
 
   if (BUFFERP (coding->dst_object))
-    coding_alloc_by_making_gap (coding, offset, nbytes);
+    {
+      struct buffer *buf = XBUFFER (coding->dst_object);
+
+      coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
+    }
   else
     coding_alloc_by_realloc (coding, nbytes);
   record_conversion_result (coding, CODING_RESULT_SUCCESS);
@@ -1225,6 +1299,8 @@ decode_coding_utf_8 (coding)
   int consumed_chars = 0, consumed_chars_base;
   int multibytep = coding->src_multibyte;
   Lisp_Object attr, charset_list;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attr, charset_list);
 
@@ -1238,13 +1314,18 @@ decode_coding_utf_8 (coding)
       if (charbuf >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c1);
+      if (byte_after_cr >= 0)
+       c1 = byte_after_cr, byte_after_cr = -1;
+      else
+       ONE_MORE_BYTE (c1);
       if (c1 < 0)
        {
          c = - c1;
        }
       else if (UTF_8_1_OCTET_P(c1))
        {
+         if (eol_crlf && c1 == '\r')
+           ONE_MORE_BYTE (byte_after_cr);
          c = c1;
        }
       else
@@ -1353,7 +1434,7 @@ encode_coding_utf_8 (coding)
            }
          else
            {
-             CHAR_STRING_ADVANCE (c, pend);
+             CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
              for (p = str; p < pend; p++)
                EMIT_ONE_BYTE (*p);
            }
@@ -1370,7 +1451,7 @@ encode_coding_utf_8 (coding)
          if (CHAR_BYTE8_P (c))
            *dst++ = CHAR_TO_BYTE8 (c);
          else
-           dst += CHAR_STRING (c, dst);
+           CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
          produced_chars++;
        }
     }
@@ -1434,11 +1515,44 @@ detect_coding_utf_16 (coding, detect_info)
                                | CATEGORY_MASK_UTF_16_BE_NOSIG
                                | CATEGORY_MASK_UTF_16_LE_NOSIG);
     }
-  else if (c1 >= 0 && c2 >= 0)
+  else
     {
+      /* We check the dispersion of Eth and Oth bytes where E is even and
+        O is odd.  If both are high, we assume binary data.*/
+      unsigned char e[256], o[256];
+      unsigned e_num = 1, o_num = 1;
+
+      memset (e, 0, 256);
+      memset (o, 0, 256);
+      e[c1] = 1;
+      o[c2] = 1;
+
       detect_info->rejected
        |= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
+
+      while (1)
+       {
+         ONE_MORE_BYTE (c1);
+         ONE_MORE_BYTE (c2);
+         if (! e[c1])
+           {
+             e[c1] = 1;
+             e_num++;
+             if (e_num >= 128)
+               break;
+           }
+         if (! o[c2])
+           {
+             o[c1] = 1;
+             o_num++;
+             if (o_num >= 128)
+               break;
+           }
+       }
+      detect_info->rejected |= CATEGORY_MASK_UTF_16;
+      return 0;
     }
+
  no_more_source:
   return 1;
 }
@@ -1458,6 +1572,8 @@ decode_coding_utf_16 (coding)
   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
   int surrogate = CODING_UTF_16_SURROGATE (coding);
   Lisp_Object attr, charset_list;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr1 = -1, byte_after_cr2 = -1;
 
   CODING_GET_INFO (coding, attr, charset_list);
 
@@ -1497,13 +1613,19 @@ decode_coding_utf_16 (coding)
       if (charbuf + 2 >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c1);
+      if (byte_after_cr1 >= 0)
+       c1 = byte_after_cr1, byte_after_cr1 = -1;
+      else
+       ONE_MORE_BYTE (c1);
       if (c1 < 0)
        {
          *charbuf++ = -c1;
          continue;
        }
-      ONE_MORE_BYTE (c2);
+      if (byte_after_cr2 >= 0)
+       c2 = byte_after_cr2, byte_after_cr2 = -1;
+      else
+       ONE_MORE_BYTE (c2);
       if (c2 < 0)
        {
          *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
@@ -1512,6 +1634,7 @@ decode_coding_utf_16 (coding)
        }
       c = (endian == utf_16_big_endian
           ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
+
       if (surrogate)
        {
          if (! UTF_16_LOW_SURROGATE_P (c))
@@ -1540,7 +1663,14 @@ decode_coding_utf_16 (coding)
          if (UTF_16_HIGH_SURROGATE_P (c))
            CODING_UTF_16_SURROGATE (coding) = surrogate = c;
          else
-           *charbuf++ = c;
+           {
+             if (eol_crlf && c == '\r')
+               {
+                 ONE_MORE_BYTE (byte_after_cr1);
+                 ONE_MORE_BYTE (byte_after_cr2);
+               }
+             *charbuf++ = c;
+           }
        }
     }
 
@@ -1831,7 +1961,7 @@ detect_coding_emacs_mule (coding, detect_info)
          /* Perhaps the start of composite character.  We simple skip
             it because analyzing it is too heavy for detecting.  But,
             at least, we check that the composite character
-            constitues of more than 4 bytes.  */
+            constitutes of more than 4 bytes.  */
          const unsigned char *src_base;
 
        repeat:
@@ -2072,6 +2202,8 @@ decode_coding_emacs_mule (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
 
@@ -2085,7 +2217,10 @@ decode_coding_emacs_mule (coding)
       if (charbuf >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c);
+      if (byte_after_cr >= 0)
+       c = byte_after_cr, byte_after_cr = -1;
+      else
+       ONE_MORE_BYTE (c);
       if (c < 0)
        {
          *charbuf++ = -c;
@@ -2093,6 +2228,8 @@ decode_coding_emacs_mule (coding)
        }
       else if (c < 0x80)
        {
+         if (eol_crlf && c == '\r')
+           ONE_MORE_BYTE (byte_after_cr);
          *charbuf++ = c;
          char_offset++;
        }
@@ -2945,6 +3082,8 @@ decode_coding_iso_2022 (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
   setup_iso_safe_charsets (attrs);
@@ -2962,7 +3101,10 @@ decode_coding_iso_2022 (coding)
       if (charbuf >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c1);
+      if (byte_after_cr >= 0)
+       c1 = byte_after_cr, byte_after_cr = -1;
+      else
+       ONE_MORE_BYTE (c1);
       if (c1 < 0)
        goto invalid_code;
 
@@ -3021,6 +3163,8 @@ decode_coding_iso_2022 (coding)
          break;
 
        case ISO_control_0:
+         if (eol_crlf && c1 == '\r')
+           ONE_MORE_BYTE (byte_after_cr);
          MAYBE_FINISH_COMPOSITION ();
          charset = CHARSET_FROM_ID (charset_ascii);
          break;
@@ -4091,6 +4235,8 @@ decode_coding_sjis (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
 
@@ -4111,11 +4257,18 @@ decode_coding_sjis (coding)
       if (charbuf >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c);
+      if (byte_after_cr >= 0)
+       c = byte_after_cr, byte_after_cr = -1;
+      else
+       ONE_MORE_BYTE (c);
       if (c < 0)
        goto invalid_code;
       if (c < 0x80)
-       charset = charset_roman;
+       {
+         if (eol_crlf && c == '\r')
+           ONE_MORE_BYTE (byte_after_cr);
+         charset = charset_roman;
+       }
       else if (c == 0x80 || c == 0xA0)
        goto invalid_code;
       else if (c >= 0xA1 && c <= 0xDF)
@@ -4193,6 +4346,8 @@ decode_coding_big5 (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
   val = charset_list;
@@ -4210,12 +4365,19 @@ decode_coding_big5 (coding)
       if (charbuf >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c);
+      if (byte_after_cr >= 0)
+       c = byte_after_cr, byte_after_cr = -1;
+      else
+       ONE_MORE_BYTE (c);
 
       if (c < 0)
        goto invalid_code;
       if (c < 0x80)
-       charset = charset_roman;
+       {
+         if (eol_crlf && c == '\r')
+           ONE_MORE_BYTE (byte_after_cr);
+         charset = charset_roman;
+       }
       else
        {
          /* BIG5 -> Big5 */
@@ -4590,7 +4752,7 @@ encode_coding_ccl (coding)
       else
        {
          ASSURE_DESTINATION (ccl.produced);
-         for (i = 0; i < ccl.produced; i++)    
+         for (i = 0; i < ccl.produced; i++)
            *dst++ = destination_charbuf[i] & 0xFF;
          produced_chars += ccl.produced;
        }
@@ -4632,10 +4794,19 @@ static void
 decode_coding_raw_text (coding)
      struct coding_system *coding;
 {
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+
   coding->chars_at_source = 1;
-  coding->consumed_char = 0;
-  coding->consumed = 0;
-  record_conversion_result (coding, CODING_RESULT_SUCCESS);
+  coding->consumed_char = coding->src_chars;
+  coding->consumed = coding->src_bytes;
+  if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
+    {
+      coding->consumed_char--;
+      coding->consumed--;
+      record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
+    }
+  else
+    record_conversion_result (coding, CODING_RESULT_SUCCESS);
 }
 
 static int
@@ -4770,7 +4941,7 @@ detect_coding_charset (coding, detect_info)
              if (src == src_end)
                goto too_short;
              ONE_MORE_BYTE (c);
-             if (c < charset->code_space[(dim - 1 - idx) * 2] 
+             if (c < charset->code_space[(dim - 1 - idx) * 2]
                  || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
                break;
            }
@@ -4829,6 +5000,8 @@ decode_coding_charset (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
   valids = AREF (attrs, coding_attr_charset_valids);
@@ -4848,7 +5021,17 @@ decode_coding_charset (coding)
       if (charbuf >= charbuf_end)
        break;
 
-      ONE_MORE_BYTE (c);
+      if (byte_after_cr >= 0)
+       {
+         c = byte_after_cr;
+         byte_after_cr = -1;
+       }
+      else
+       {
+         ONE_MORE_BYTE (c);
+         if (eol_crlf && c == '\r')
+           ONE_MORE_BYTE (byte_after_cr);
+       }
       if (c < 0)
        goto invalid_code;
       code = c;
@@ -5529,32 +5712,53 @@ detect_coding (coding)
     {
       int c, i;
       struct coding_detection_info detect_info;
+      int null_byte_found = 0, eight_bit_found = 0;
 
       detect_info.checked = detect_info.found = detect_info.rejected = 0;
-      for (i = 0, src = coding->source; src < src_end; i++, src++)
+      coding->head_ascii = -1;
+      for (src = coding->source; src < src_end; src++)
        {
          c = *src;
          if (c & 0x80)
-           break;
-         if (c < 0x20
-             && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
-             && ! inhibit_iso_escape_detection
-             && ! detect_info.checked)
            {
-             coding->head_ascii = src - (coding->source + coding->consumed);
-             if (detect_coding_iso_2022 (coding, &detect_info))
+             eight_bit_found = 1;
+             if (coding->head_ascii < 0)
+               coding->head_ascii = src - coding->source;
+             if (null_byte_found)
+               break;
+           }
+         else if (c < 0x20)
+           {
+             if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+                 && ! inhibit_iso_escape_detection
+                 && ! detect_info.checked)
                {
-                 /* We have scanned the whole data.  */
-                 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
-                   /* We didn't find an 8-bit code.  */
-                   src = src_end;
-                 break;
+                 if (coding->head_ascii < 0)
+                   coding->head_ascii = src - coding->source;
+                 if (detect_coding_iso_2022 (coding, &detect_info))
+                   {
+                     /* We have scanned the whole data.  */
+                     if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+                       /* We didn't find an 8-bit code.  We may have
+                          found a null-byte, but it's very rare that
+                          a binary file confirm to ISO-2022.  */
+                       src = src_end;
+                     break;
+                   }
+               }
+             else if (! c)
+               {
+                 null_byte_found = 1;
+                 if (eight_bit_found)
+                   break;
                }
            }
        }
-      coding->head_ascii = src - (coding->source + coding->consumed);
+      if (coding->head_ascii < 0)
+       coding->head_ascii = src - coding->source;
 
-      if (coding->head_ascii < coding->src_bytes
+      if (null_byte_found || eight_bit_found
+         || coding->head_ascii < coding->src_bytes
          || detect_info.found)
        {
          enum coding_category category;
@@ -5570,48 +5774,58 @@ detect_coding (coding)
                  break;
              }
          else
-           for (i = 0; i < coding_category_raw_text; i++)
-             {
-               category = coding_priorities[i];
-               this = coding_categories + category;
-               if (this->id < 0)
-                 {
-                   /* No coding system of this category is defined.  */
-                   detect_info.rejected |= (1 << category);
-                 }
-               else if (category >= coding_category_raw_text)
-                 continue;
-               else if (detect_info.checked & (1 << category))
-                 {
-                   if (detect_info.found & (1 << category))
-                     break;
-                 }
-               else if ((*(this->detector)) (coding, &detect_info)
-                        && detect_info.found & (1 << category))
-                 {
-                   if (category == coding_category_utf_16_auto)
-                     {
-                       if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
-                         category = coding_category_utf_16_le;
-                       else
-                         category = coding_category_utf_16_be;
-                     }
-                   break;
-                 }
-             }
-         
-         if (i < coding_category_raw_text)
-           setup_coding_system (CODING_ID_NAME (this->id), coding);
-         else if (detect_info.rejected == CATEGORY_MASK_ANY)
-           setup_coding_system (Qraw_text, coding);
-         else if (detect_info.rejected)
-           for (i = 0; i < coding_category_raw_text; i++)
-             if (! (detect_info.rejected & (1 << coding_priorities[i])))
+           {
+             if (null_byte_found)
                {
-                 this = coding_categories + coding_priorities[i];
-                 setup_coding_system (CODING_ID_NAME (this->id), coding);
-                 break;
+                 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
+                 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
                }
+             for (i = 0; i < coding_category_raw_text; i++)
+               {
+                 category = coding_priorities[i];
+                 this = coding_categories + category;
+                 if (this->id < 0)
+                   {
+                     /* No coding system of this category is defined.  */
+                     detect_info.rejected |= (1 << category);
+                   }
+                 else if (category >= coding_category_raw_text)
+                   continue;
+                 else if (detect_info.checked & (1 << category))
+                   {
+                     if (detect_info.found & (1 << category))
+                       break;
+                   }
+                 else if ((*(this->detector)) (coding, &detect_info)
+                          && detect_info.found & (1 << category))
+                   {
+                     if (category == coding_category_utf_16_auto)
+                       {
+                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+                           category = coding_category_utf_16_le;
+                         else
+                           category = coding_category_utf_16_be;
+                       }
+                     break;
+                   }
+               }
+
+             if (i < coding_category_raw_text)
+               setup_coding_system (CODING_ID_NAME (this->id), coding);
+             else if (null_byte_found)
+               setup_coding_system (Qno_conversion, coding);
+             else if ((detect_info.rejected & CATEGORY_MASK_ANY)
+                      == CATEGORY_MASK_ANY)
+               setup_coding_system (Qraw_text, coding);
+             else if (detect_info.rejected)
+               for (i = 0; i < coding_category_raw_text; i++)
+                 if (! (detect_info.rejected & (1 << coding_priorities[i])))
+                   {
+                     this = coding_categories + coding_priorities[i];
+                     setup_coding_system (CODING_ID_NAME (this->id), coding);
+                     break;
+                   }
+           }
        }
     }
   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
@@ -5641,7 +5855,7 @@ decode_eol (coding)
 {
   Lisp_Object eol_type;
   unsigned char *p, *pbeg, *pend;
-  
+
   eol_type = CODING_ID_EOL_TYPE (coding->id);
   if (EQ (eol_type, Qunix))
     return;
@@ -5880,19 +6094,21 @@ produce_chars (coding, translation_table, last_block)
 {
   unsigned char *dst = coding->destination + coding->produced;
   unsigned char *dst_end = coding->destination + coding->dst_bytes;
-  int produced;
-  int produced_chars = 0;
+  EMACS_INT produced;
+  EMACS_INT produced_chars = 0;
   int carryover = 0;
 
   if (! coding->chars_at_source)
     {
-      /* Characters are in coding->charbuf.  */
+      /* Source characters are in coding->charbuf.  */
       int *buf = coding->charbuf;
       int *buf_end = buf + coding->charbuf_used;
 
-      if (BUFFERP (coding->src_object)
-         && EQ (coding->src_object, coding->dst_object))
-       dst_end = ((unsigned char *) coding->source) + coding->consumed;
+      if (EQ (coding->src_object, coding->dst_object))
+       {
+         coding_set_source (coding);
+         dst_end = ((unsigned char *) coding->source) + coding->consumed;
+       }
 
       while (buf < buf_end)
        {
@@ -5919,7 +6135,13 @@ produce_chars (coding, translation_table, last_block)
                                           buf_end - buf
                                           + MAX_MULTIBYTE_LENGTH * to_nchars,
                                           dst);
-                 dst_end = coding->destination + coding->dst_bytes;
+                 if (EQ (coding->src_object, coding->dst_object))
+                   {
+                     coding_set_source (coding);
+                     dst_end = ((unsigned char *) coding->source) + coding->consumed;
+                   }
+                 else
+                   dst_end = coding->destination + coding->dst_bytes;
                }
 
              for (i = 0; i < to_nchars; i++)
@@ -5928,7 +6150,7 @@ produce_chars (coding, translation_table, last_block)
                    c = XINT (AREF (trans, i));
                  if (coding->dst_multibyte
                      || ! CHAR_BYTE8_P (c))
-                   CHAR_STRING_ADVANCE (c, dst);
+                   CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
                  else
                    *dst++ = CHAR_TO_BYTE8 (c);
                }
@@ -5945,18 +6167,18 @@ produce_chars (coding, translation_table, last_block)
     }
   else
     {
+      /* Source characters are at coding->source.  */
       const unsigned char *src = coding->source;
-      const unsigned char *src_end = src + coding->src_bytes;
-      Lisp_Object eol_type;
-
-      eol_type = CODING_ID_EOL_TYPE (coding->id);
+      const unsigned char *src_end = src + coding->consumed;
 
+      if (EQ (coding->dst_object, coding->src_object))
+       dst_end = (unsigned char *) src;
       if (coding->src_multibyte != coding->dst_multibyte)
        {
          if (coding->src_multibyte)
            {
              int multibytep = 1;
-             int consumed_chars;
+             EMACS_INT consumed_chars;
 
              while (1)
                {
@@ -5964,37 +6186,23 @@ produce_chars (coding, translation_table, last_block)
                  int c;
 
                  ONE_MORE_BYTE (c);
-                 if (c == '\r')
+                 if (dst == dst_end)
                    {
-                     if (EQ (eol_type, Qdos))
+                     if (EQ (coding->src_object, coding->dst_object))
+                       dst_end = (unsigned char *) src;
+                     if (dst == dst_end)
                        {
-                         if (src == src_end)
-                           {
-                             record_conversion_result
-                               (coding, CODING_RESULT_INSUFFICIENT_SRC);
-                             goto no_more_source;
-                           }
-                         if (*src == '\n')
-                           c = *src++;
+                         EMACS_INT offset = src - coding->source;
+
+                         dst = alloc_destination (coding, src_end - src + 1,
+                                                  dst);
+                         dst_end = coding->destination + coding->dst_bytes;
+                         coding_set_source (coding);
+                         src = coding->source + offset;
+                         src_end = coding->source + coding->src_bytes;
+                         if (EQ (coding->src_object, coding->dst_object))
+                           dst_end = (unsigned char *) src;
                        }
-                     else if (EQ (eol_type, Qmac))
-                       c = '\n';
-                   }
-                 if (dst == dst_end)
-                   {
-                     coding->consumed = src - coding->source;
-
-                   if (EQ (coding->src_object, coding->dst_object))
-                     dst_end = (unsigned char *) src;
-                   if (dst == dst_end)
-                     {
-                       dst = alloc_destination (coding, src_end - src + 1,
-                                                dst);
-                       dst_end = coding->destination + coding->dst_bytes;
-                       coding_set_source (coding);
-                       src = coding->source + coding->consumed;
-                       src_end = coding->source + coding->src_bytes;
-                     }
                    }
                  *dst++ = c;
                  produced_chars++;
@@ -6008,31 +6216,26 @@ produce_chars (coding, translation_table, last_block)
                int multibytep = 1;
                int c = *src++;
 
-               if (c == '\r')
-                 {
-                   if (EQ (eol_type, Qdos))
-                     {
-                       if (src < src_end
-                           && *src == '\n')
-                         c = *src++;
-                     }
-                   else if (EQ (eol_type, Qmac))
-                     c = '\n';
-                 }
                if (dst >= dst_end - 1)
                  {
-                   coding->consumed = src - coding->source;
-
                    if (EQ (coding->src_object, coding->dst_object))
                      dst_end = (unsigned char *) src;
                    if (dst >= dst_end - 1)
                      {
-                       dst = alloc_destination (coding, src_end - src + 2,
-                                                dst);
+                       EMACS_INT offset = src - coding->source;
+                       EMACS_INT more_bytes;
+
+                       if (EQ (coding->src_object, coding->dst_object))
+                         more_bytes = ((src_end - src) / 2) + 2;
+                       else
+                         more_bytes = src_end - src + 2;
+                       dst = alloc_destination (coding, more_bytes, dst);
                        dst_end = coding->destination + coding->dst_bytes;
                        coding_set_source (coding);
-                       src = coding->source + coding->consumed;
+                       src = coding->source + offset;
                        src_end = coding->source + coding->src_bytes;
+                       if (EQ (coding->src_object, coding->dst_object))
+                         dst_end = (unsigned char *) src;
                      }
                  }
                EMIT_ONE_BYTE (c);
@@ -6042,7 +6245,7 @@ produce_chars (coding, translation_table, last_block)
        {
          if (!EQ (coding->src_object, coding->dst_object))
            {
-             int require = coding->src_bytes - coding->dst_bytes;
+             EMACS_INT require = coding->src_bytes - coding->dst_bytes;
 
              if (require > 0)
                {
@@ -6054,28 +6257,10 @@ produce_chars (coding, translation_table, last_block)
                  src_end = coding->source + coding->src_bytes;
                }
            }
-         produced_chars = coding->src_chars;
+         produced_chars = coding->consumed_char;
          while (src < src_end)
-           {
-             int c = *src++;
-
-             if (c == '\r')
-               {
-                 if (EQ (eol_type, Qdos))
-                   {
-                     if (src < src_end
-                         && *src == '\n')
-                       c = *src++;
-                     produced_chars--;
-                   }
-                 else if (EQ (eol_type, Qmac))
-                   c = '\n';
-               }
-             *dst++ = c;
-           }
+           *dst++ = *src++;
        }
-      coding->consumed = coding->src_bytes;
-      coding->consumed_char = coding->src_chars;
     }
 
   produced = dst - (coding->destination + coding->produced);
@@ -6538,12 +6723,12 @@ consume_chars (coding, translation_table, max_lookup)
          if (coding->encoder == encode_coding_raw_text)
            c = *src++, pos++;
          else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
-           c = STRING_CHAR_ADVANCE (src), pos += bytes;
+           c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
          else
            c = BYTE8_TO_CHAR (*src), src++, pos++;
        }
       else
-       c = STRING_CHAR_ADVANCE (src), pos++;
+       c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
        c = '\n';
       if (! EQ (eol_type, Qunix))
@@ -6693,7 +6878,7 @@ make_conversion_work_buffer (multibyte)
     }
   current = current_buffer;
   set_buffer_internal (XBUFFER (workbuf));
-  Ferase_buffer ();      
+  Ferase_buffer ();
   current_buffer->undo_list = Qt;
   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
   set_buffer_internal (current);
@@ -6891,6 +7076,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
            }
          saved_pt = PT, saved_pt_byte = PT_BYTE;
          TEMP_SET_PT_BOTH (from, from_byte);
+         current_buffer->text->inhibit_shrinking = 1;
          del_range_both (from, from_byte, to, to_byte, 1);
          coding->src_pos = -chars;
          coding->src_pos_byte = -bytes;
@@ -6910,10 +7096,10 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
       || (! NILP (CODING_ATTR_POST_READ (attrs))
          && NILP (dst_object)))
     {
-      coding->dst_object = code_conversion_save (1, 1);
+      coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
+      coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
       coding->dst_pos = BEG;
       coding->dst_pos_byte = BEG_BYTE;
-      coding->dst_multibyte = 1;
     }
   else if (BUFFERP (dst_object))
     {
@@ -6928,6 +7114,9 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
     {
       code_conversion_save (0, 0);
       coding->dst_object = Qnil;
+      /* Most callers presume this will return a multibyte result, and they
+        won't use `binary' or `raw-text' anyway, so let's not worry about
+        CODING_FOR_UNIBYTE.  */
       coding->dst_multibyte = 1;
     }
 
@@ -6984,6 +7173,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
         As we have moved PT while replacing the original buffer
         contents, we must recover it now.  */
       set_buffer_internal (XBUFFER (src_object));
+      current_buffer->text->inhibit_shrinking = 0;
       if (saved_pt < from)
        TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
       else if (saved_pt < from + chars)
@@ -7348,6 +7538,7 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
   int id;
   struct coding_detection_info detect_info;
   enum coding_category base_category;
+  int null_byte_found = 0, eight_bit_found = 0;
 
   if (NILP (coding_system))
     coding_system = Qundecided;
@@ -7373,33 +7564,54 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
       struct coding_system *this;
       int c, i;
 
+      coding.head_ascii = -1;
       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
-      for (i = 0; src < src_end; i++, src++)
+      for (; src < src_end; src++)
        {
          c = *src;
          if (c & 0x80)
-           break;
-         if (c < 0x20
-             && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
-             && ! inhibit_iso_escape_detection)
            {
-             coding.head_ascii = src - coding.source;
-             if (detect_coding_iso_2022 (&coding, &detect_info))
+             eight_bit_found = 1;
+             if (coding.head_ascii < 0)
+               coding.head_ascii = src - coding.source;
+             if (null_byte_found)
+               break;
+           }
+         if (c < 0x20)
+           {
+             if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+                 && ! inhibit_iso_escape_detection
+                 && ! detect_info.checked)
                {
-                 /* We have scanned the whole data.  */
-                 if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
-                   /* We didn't find an 8-bit code.  */
-                   src = src_end;
-                 break;
+                 if (coding.head_ascii < 0)
+                   coding.head_ascii = src - coding.source;
+                 if (detect_coding_iso_2022 (&coding, &detect_info))
+                   {
+                     /* We have scanned the whole data.  */
+                     if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+                       /* We didn't find an 8-bit code.  We may have
+                          found a null-byte, but it's very rare that
+                          a binary file confirm to ISO-2022.  */
+                       src = src_end;
+                     break;
+                   }
+               }
+             else if (! c)
+               {
+                 null_byte_found = 1;
+                 if (eight_bit_found)
+                   break;
                }
            }
        }
-      coding.head_ascii = src - coding.source;
+      if (coding.head_ascii < 0)
+       coding.head_ascii = src - coding.source;
 
-      if (src < src_end
+      if (null_byte_found || eight_bit_found
+         || coding.head_ascii < coding.src_bytes
          || detect_info.found)
        {
-         if (src == src_end)
+         if (coding.head_ascii == coding.src_bytes)
            /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
            for (i = 0; i < coding_category_raw_text; i++)
              {
@@ -7409,44 +7621,48 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
                  break;
              }
          else
-           for (i = 0; i < coding_category_raw_text; i++)
-             {
-               category = coding_priorities[i];
-               this = coding_categories + category;
+           {
+             if (null_byte_found)
+               {
+                 detect_info.checked |= ~CATEGORY_MASK_UTF_16;
+                 detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
+               }
+             for (i = 0; i < coding_category_raw_text; i++)
+               {
+                 category = coding_priorities[i];
+                 this = coding_categories + category;
 
-               if (this->id < 0)
-                 {
-                   /* No coding system of this category is defined.  */
-                   detect_info.rejected |= (1 << category);
-                 }
-               else if (category >= coding_category_raw_text)
-                 continue;
-               else if (detect_info.checked & (1 << category))
-                 {
-                   if (highest
-                       && (detect_info.found & (1 << category)))
-                     break;
-                 }
-               else
-                 {
-                   if ((*(this->detector)) (&coding, &detect_info)
-                       && highest
-                       && (detect_info.found & (1 << category)))
-                     {
-                       if (category == coding_category_utf_16_auto)
-                         {
-                           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
-                             category = coding_category_utf_16_le;
-                           else
-                             category = coding_category_utf_16_be;
-                         }
+                 if (this->id < 0)
+                   {
+                     /* No coding system of this category is defined.  */
+                     detect_info.rejected |= (1 << category);
+                   }
+                 else if (category >= coding_category_raw_text)
+                   continue;
+                 else if (detect_info.checked & (1 << category))
+                   {
+                     if (highest
+                         && (detect_info.found & (1 << category)))
                        break;
-                     }
-                 }
-             }
+                   }
+                 else if ((*(this->detector)) (&coding, &detect_info)
+                          && highest
+                          && (detect_info.found & (1 << category)))
+                   {
+                     if (category == coding_category_utf_16_auto)
+                       {
+                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+                           category = coding_category_utf_16_le;
+                         else
+                           category = coding_category_utf_16_be;
+                       }
+                     break;
+                   }
+               }
+           }
        }
 
-      if (detect_info.rejected == CATEGORY_MASK_ANY)
+      if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY)
        {
          detect_info.found = CATEGORY_MASK_RAW_TEXT;
          id = coding_categories[coding_category_raw_text].id;
@@ -7535,8 +7751,13 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
     if (VECTORP (eol_type))
       {
        if (detect_info.found & ~CATEGORY_MASK_UTF_16)
-         normal_eol = detect_eol (coding.source, src_bytes,
-                                  coding_category_raw_text);
+         {
+           if (null_byte_found)
+             normal_eol = EOL_SEEN_LF;
+           else
+             normal_eol = detect_eol (coding.source, src_bytes,
+                                      coding_category_raw_text);
+         }
        if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
                                 | CATEGORY_MASK_UTF_16_BE_NOSIG))
          utf_16_be_eol = detect_eol (coding.source, src_bytes,
@@ -8502,7 +8723,7 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
   operation = args[0];
   if (!SYMBOLP (operation)
       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
-    error ("Invalid first arguement");
+    error ("Invalid first argument");
   if (nargs < 1 + XINT (target_idx))
     error ("Too few arguments for operation: %s",
           SDATA (SYMBOL_NAME (operation)));
@@ -9121,7 +9342,7 @@ usage: (define-coding-system-internal ...)  */)
     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
                                CODING_ATTR_PLIST (attrs)));
   CODING_ATTR_PLIST (attrs)
-    = Fcons (QCascii_compatible_p, 
+    = Fcons (QCascii_compatible_p,
             Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
                    CODING_ATTR_PLIST (attrs)));