X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/13818c30785d1253412c4e08c61417eb81a98c5b..9f2554de935574cb1168b8de6fb3b38079bc4b80:/src/coding.c

diff --git a/src/coding.c b/src/coding.c
index 8bac5c5ae8..e292f80859 100644
--- a/src/coding.c
+++ b/src/coding.c
@@ -11,10 +11,10 @@
 
 This file is part of GNU Emacs.
 
-GNU Emacs is free software; you can redistribute it and/or modify
+GNU Emacs is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
 
 GNU Emacs is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -22,9 +22,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
-along with GNU Emacs; see the file COPYING.  If not, write to
-the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA.  */
+along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 
 /*** TABLE OF CONTENTS ***
 
@@ -316,7 +314,7 @@ Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5;
 Lisp_Object Qbig, Qlittle;
 Lisp_Object Qcoding_system_history;
 Lisp_Object Qvalid_codes;
-Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
+Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
 Lisp_Object QCdecode_translation_table, QCencode_translation_table;
 Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
 Lisp_Object QCascii_compatible_p;
@@ -548,6 +546,9 @@ enum iso_code_class_type
    character is prohibited by CODING_ISO_FLAG_SAFE.  */
 #define CODING_INHIBIT_CHARACTER_SUBSTITUTION  '?'
 
+/* UTF-8 section */
+#define CODING_UTF_8_BOM(coding)	\
+  ((coding)->spec.utf_8_bom)
 
 /* UTF-16 section */
 #define CODING_UTF_16_BOM(coding)	\
@@ -578,7 +579,9 @@ enum coding_category
     coding_category_iso_8_2,
     coding_category_iso_7_else,
     coding_category_iso_8_else,
-    coding_category_utf_8,
+    coding_category_utf_8_auto,
+    coding_category_utf_8_nosig,
+    coding_category_utf_8_sig,
     coding_category_utf_16_auto,
     coding_category_utf_16_be,
     coding_category_utf_16_le,
@@ -602,7 +605,9 @@ enum coding_category
 #define CATEGORY_MASK_ISO_8_2		(1 << coding_category_iso_8_2)
 #define CATEGORY_MASK_ISO_7_ELSE	(1 << coding_category_iso_7_else)
 #define CATEGORY_MASK_ISO_8_ELSE	(1 << coding_category_iso_8_else)
-#define CATEGORY_MASK_UTF_8		(1 << coding_category_utf_8)
+#define CATEGORY_MASK_UTF_8_AUTO	(1 << coding_category_utf_8_auto)
+#define CATEGORY_MASK_UTF_8_NOSIG	(1 << coding_category_utf_8_nosig)
+#define CATEGORY_MASK_UTF_8_SIG		(1 << coding_category_utf_8_sig)
 #define CATEGORY_MASK_UTF_16_AUTO	(1 << coding_category_utf_16_auto)
 #define CATEGORY_MASK_UTF_16_BE		(1 << coding_category_utf_16_be)
 #define CATEGORY_MASK_UTF_16_LE		(1 << coding_category_utf_16_le)
@@ -624,7 +629,10 @@ enum coding_category
    | CATEGORY_MASK_ISO_8_2		\
    | CATEGORY_MASK_ISO_7_ELSE		\
    | CATEGORY_MASK_ISO_8_ELSE		\
-   | CATEGORY_MASK_UTF_8		\
+   | CATEGORY_MASK_UTF_8_AUTO		\
+   | CATEGORY_MASK_UTF_8_NOSIG		\
+   | CATEGORY_MASK_UTF_8_SIG		\
+   | CATEGORY_MASK_UTF_16_AUTO		\
    | CATEGORY_MASK_UTF_16_BE		\
    | CATEGORY_MASK_UTF_16_LE		\
    | CATEGORY_MASK_UTF_16_BE_NOSIG	\
@@ -657,11 +665,16 @@ enum coding_category
      | CATEGORY_MASK_ISO_ELSE)
 
 #define CATEGORY_MASK_UTF_16		\
-  (CATEGORY_MASK_UTF_16_BE		\
+  (CATEGORY_MASK_UTF_16_AUTO		\
+   | CATEGORY_MASK_UTF_16_BE		\
    | CATEGORY_MASK_UTF_16_LE		\
    | CATEGORY_MASK_UTF_16_BE_NOSIG	\
    | CATEGORY_MASK_UTF_16_LE_NOSIG)
 
+#define CATEGORY_MASK_UTF_8	\
+  (CATEGORY_MASK_UTF_8_AUTO	\
+   | CATEGORY_MASK_UTF_8_NOSIG	\
+   | CATEGORY_MASK_UTF_8_SIG)
 
 /* List of symbols `coding-category-xxx' ordered by priority.  This
    variable is exposed to Emacs Lisp.  */
@@ -898,7 +911,7 @@ static INLINE void produce_charset P_ ((struct coding_system *, int *,
 static void produce_annotation P_ ((struct coding_system *, EMACS_INT));
 static int decode_coding P_ ((struct coding_system *));
 static INLINE int *handle_composition_annotation P_ ((EMACS_INT, EMACS_INT,
-						      struct coding_system *, 
+						      struct coding_system *,
 						      int *, EMACS_INT *));
 static INLINE int *handle_charset_annotation P_ ((EMACS_INT, EMACS_INT,
 						  struct coding_system *,
@@ -955,6 +968,11 @@ record_conversion_result (struct coding_system *coding,
   } while (0)
 
 
+/* If there are at least BYTES length of room at dst, allocate memory
+   for coding->destination and update dst and dst_end.  We don't have
+   to take care of coding->source which will be relocated.  It is
+   handled by calling coding_set_source in encode_coding.  */
+
 #define ASSURE_DESTINATION(bytes)				\
   do {								\
     if (dst + (bytes) >= dst_end)				\
@@ -967,6 +985,66 @@ record_conversion_result (struct coding_system *coding,
   } while (0)
 
 
+/* Store multibyte form of the character C in P, and advance P to the
+   end of the multibyte form.  This is like CHAR_STRING_ADVANCE but it
+   never calls MAYBE_UNIFY_CHAR.  */
+
+#define CHAR_STRING_ADVANCE_NO_UNIFY(c, p)	\
+  do {						\
+    if ((c) <= MAX_1_BYTE_CHAR)			\
+      *(p)++ = (c);				\
+    else if ((c) <= MAX_2_BYTE_CHAR)		\
+      *(p)++ = (0xC0 | ((c) >> 6)),		\
+	*(p)++ = (0x80 | ((c) & 0x3F));		\
+    else if ((c) <= MAX_3_BYTE_CHAR)		\
+      *(p)++ = (0xE0 | ((c) >> 12)),		\
+	*(p)++ = (0x80 | (((c) >> 6) & 0x3F)),	\
+	*(p)++ = (0x80 | ((c) & 0x3F));		\
+    else if ((c) <= MAX_4_BYTE_CHAR)		\
+      *(p)++ = (0xF0 | (c >> 18)),		\
+	*(p)++ = (0x80 | ((c >> 12) & 0x3F)),	\
+	*(p)++ = (0x80 | ((c >> 6) & 0x3F)),	\
+	*(p)++ = (0x80 | (c & 0x3F));		\
+    else if ((c) <= MAX_5_BYTE_CHAR)		\
+      *(p)++ = 0xF8,				\
+	*(p)++ = (0x80 | ((c >> 18) & 0x0F)),	\
+	*(p)++ = (0x80 | ((c >> 12) & 0x3F)),	\
+	*(p)++ = (0x80 | ((c >> 6) & 0x3F)),	\
+	*(p)++ = (0x80 | (c & 0x3F));		\
+    else					\
+      (p) += BYTE8_STRING ((c) - 0x3FFF80, p);	\
+  } while (0)
+
+
+/* Return the character code of character whose multibyte form is at
+   P, and advance P to the end of the multibyte form.  This is like
+   STRING_CHAR_ADVANCE, but it never calls MAYBE_UNIFY_CHAR.  */
+
+#define STRING_CHAR_ADVANCE_NO_UNIFY(p)				\
+  (!((p)[0] & 0x80)						\
+   ? *(p)++							\
+   : ! ((p)[0] & 0x20)						\
+   ? ((p) += 2,							\
+      ((((p)[-2] & 0x1F) << 6)					\
+       | ((p)[-1] & 0x3F)					\
+       | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))	\
+   : ! ((p)[0] & 0x10)						\
+   ? ((p) += 3,							\
+      ((((p)[-3] & 0x0F) << 12)					\
+       | (((p)[-2] & 0x3F) << 6)				\
+       | ((p)[-1] & 0x3F)))					\
+   : ! ((p)[0] & 0x08)						\
+   ? ((p) += 4,							\
+      ((((p)[-4] & 0xF) << 18)					\
+       | (((p)[-3] & 0x3F) << 12)				\
+       | (((p)[-2] & 0x3F) << 6)				\
+       | ((p)[-1] & 0x3F)))					\
+   : ((p) += 5,							\
+      ((((p)[-4] & 0x3F) << 18)					\
+       | (((p)[-3] & 0x3F) << 12)				\
+       | (((p)[-2] & 0x3F) << 6)				\
+       | ((p)[-1] & 0x3F))))
+
 
 static void
 coding_set_source (coding)
@@ -1032,20 +1110,23 @@ coding_alloc_by_realloc (coding, bytes)
 }
 
 static void
-coding_alloc_by_making_gap (coding, offset, bytes)
+coding_alloc_by_making_gap (coding, gap_head_used, bytes)
      struct coding_system *coding;
-     EMACS_INT offset, bytes;
+     EMACS_INT gap_head_used, bytes;
 {
-  if (BUFFERP (coding->dst_object)
-      && EQ (coding->src_object, coding->dst_object))
+  if (EQ (coding->src_object, coding->dst_object))
     {
-      EMACS_INT add = offset + (coding->src_bytes - coding->consumed);
+      /* The gap may contain the produced data at the head and not-yet
+	 consumed data at the tail.  To preserve those data, we at
+	 first make the gap size to zero, then increase the gap
+	 size.  */
+      EMACS_INT add = GAP_SIZE;
 
-      GPT += offset, GPT_BYTE += offset;
-      GAP_SIZE -= add; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
+      GPT += gap_head_used, GPT_BYTE += gap_head_used;
+      GAP_SIZE = 0; ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
       make_gap (bytes);
       GAP_SIZE += add; ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
-      GPT -= offset, GPT_BYTE -= offset;
+      GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
     }
   else
     {
@@ -1068,7 +1149,11 @@ alloc_destination (coding, nbytes, dst)
   EMACS_INT offset = dst - coding->destination;
 
   if (BUFFERP (coding->dst_object))
-    coding_alloc_by_making_gap (coding, offset, nbytes);
+    {
+      struct buffer *buf = XBUFFER (coding->dst_object);
+
+      coding_alloc_by_making_gap (coding, dst - BUF_GPT_ADDR (buf), nbytes);
+    }
   else
     coding_alloc_by_realloc (coding, nbytes);
   record_conversion_result (coding, CODING_RESULT_SUCCESS);
@@ -1142,6 +1227,11 @@ alloc_destination (coding, nbytes, dst)
 #define UTF_8_4_OCTET_LEADING_P(c) (((c) & 0xF8) == 0xF0)
 #define UTF_8_5_OCTET_LEADING_P(c) (((c) & 0xFC) == 0xF8)
 
+#define UTF_BOM 0xFEFF
+#define UTF_8_BOM_1 0xEF
+#define UTF_8_BOM_2 0xBB
+#define UTF_8_BOM_3 0xBF
+
 static int
 detect_coding_utf_8 (coding, detect_info)
      struct coding_system *coding;
@@ -1151,6 +1241,7 @@ detect_coding_utf_8 (coding, detect_info)
   const unsigned char *src_end = coding->source + coding->src_bytes;
   int multibytep = coding->src_multibyte;
   int consumed_chars = 0;
+  int bom_found = 0;
   int found = 0;
 
   detect_info->checked |= CATEGORY_MASK_UTF_8;
@@ -1170,7 +1261,7 @@ detect_coding_utf_8 (coding, detect_info)
 	break;
       if (UTF_8_2_OCTET_LEADING_P (c))
 	{
-	  found = CATEGORY_MASK_UTF_8;
+	  found = 1;
 	  continue;
 	}
       ONE_MORE_BYTE (c2);
@@ -1178,7 +1269,10 @@ detect_coding_utf_8 (coding, detect_info)
 	break;
       if (UTF_8_3_OCTET_LEADING_P (c))
 	{
-	  found = CATEGORY_MASK_UTF_8;
+	  found = 1;
+	  if (src_base == coding->source
+	      && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
+	    bom_found = 1;
 	  continue;
 	}
       ONE_MORE_BYTE (c3);
@@ -1186,7 +1280,7 @@ detect_coding_utf_8 (coding, detect_info)
 	break;
       if (UTF_8_4_OCTET_LEADING_P (c))
 	{
-	  found = CATEGORY_MASK_UTF_8;
+	  found = 1;
 	  continue;
 	}
       ONE_MORE_BYTE (c4);
@@ -1194,7 +1288,7 @@ detect_coding_utf_8 (coding, detect_info)
 	break;
       if (UTF_8_5_OCTET_LEADING_P (c))
 	{
-	  found = CATEGORY_MASK_UTF_8;
+	  found = 1;
 	  continue;
 	}
       break;
@@ -1208,7 +1302,17 @@ detect_coding_utf_8 (coding, detect_info)
       detect_info->rejected |= CATEGORY_MASK_UTF_8;
       return 0;
     }
-  detect_info->found |= found;
+  if (bom_found)
+    {
+      /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
+      detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
+    }
+  else
+    {
+      detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
+      if (found)
+	detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
+    }
   return 1;
 }
 
@@ -1222,12 +1326,48 @@ decode_coding_utf_8 (coding)
   const unsigned char *src_base;
   int *charbuf = coding->charbuf + coding->charbuf_used;
   int *charbuf_end = coding->charbuf + coding->charbuf_size;
-  int consumed_chars = 0, consumed_chars_base;
+  int consumed_chars = 0, consumed_chars_base = 0;
   int multibytep = coding->src_multibyte;
+  enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
   Lisp_Object attr, charset_list;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attr, charset_list);
 
+  if (bom != utf_without_bom)
+    {
+      int c1, c2, c3;
+
+      src_base = src;
+      ONE_MORE_BYTE (c1);
+      if (! UTF_8_3_OCTET_LEADING_P (c1))
+	src = src_base;
+      else
+	{
+	  ONE_MORE_BYTE (c2);
+	  if (! UTF_8_EXTRA_OCTET_P (c2))
+	    src = src_base;
+	  else
+	    {
+	      ONE_MORE_BYTE (c3);
+	      if (! UTF_8_EXTRA_OCTET_P (c3))
+		src = src_base;
+	      else
+		{
+		  if ((c1 != UTF_8_BOM_1)
+		      || (c2 != UTF_8_BOM_2) || (c3 != UTF_8_BOM_3))
+		    src = src_base;
+		  else
+		    CODING_UTF_8_BOM (coding) = utf_without_bom;
+		}
+	    }
+	}
+    }
+  CODING_UTF_8_BOM (coding) = utf_without_bom;
+
+
+
   while (1)
     {
       int c, c1, c2, c3, c4, c5;
@@ -1238,13 +1378,18 @@ decode_coding_utf_8 (coding)
       if (charbuf >= charbuf_end)
 	break;
 
-      ONE_MORE_BYTE (c1);
+      if (byte_after_cr >= 0)
+	c1 = byte_after_cr, byte_after_cr = -1;
+      else
+	ONE_MORE_BYTE (c1);
       if (c1 < 0)
 	{
 	  c = - c1;
 	}
       else if (UTF_8_1_OCTET_P(c1))
 	{
+	  if (eol_crlf && c1 == '\r')
+	    ONE_MORE_BYTE (byte_after_cr);
 	  c = c1;
 	}
       else
@@ -1336,6 +1481,13 @@ encode_coding_utf_8 (coding)
   int produced_chars = 0;
   int c;
 
+  if (CODING_UTF_8_BOM (coding) == utf_with_bom)
+    {
+      ASSURE_DESTINATION (3);
+      EMIT_THREE_BYTES (UTF_8_BOM_1, UTF_8_BOM_2, UTF_8_BOM_3);
+      CODING_UTF_8_BOM (coding) = utf_without_bom;
+    }
+
   if (multibytep)
     {
       int safe_room = MAX_MULTIBYTE_LENGTH * 2;
@@ -1353,7 +1505,7 @@ encode_coding_utf_8 (coding)
 	    }
 	  else
 	    {
-	      CHAR_STRING_ADVANCE (c, pend);
+	      CHAR_STRING_ADVANCE_NO_UNIFY (c, pend);
 	      for (p = str; p < pend; p++)
 		EMIT_ONE_BYTE (*p);
 	    }
@@ -1370,7 +1522,7 @@ encode_coding_utf_8 (coding)
 	  if (CHAR_BYTE8_P (c))
 	    *dst++ = CHAR_TO_BYTE8 (c);
 	  else
-	    dst += CHAR_STRING (c, dst);
+	    CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
 	  produced_chars++;
 	}
     }
@@ -1434,11 +1586,44 @@ detect_coding_utf_16 (coding, detect_info)
 				| CATEGORY_MASK_UTF_16_BE_NOSIG
 				| CATEGORY_MASK_UTF_16_LE_NOSIG);
     }
-  else if (c1 >= 0 && c2 >= 0)
+  else
     {
+      /* We check the dispersion of Eth and Oth bytes where E is even and
+	 O is odd.  If both are high, we assume binary data.*/
+      unsigned char e[256], o[256];
+      unsigned e_num = 1, o_num = 1;
+
+      memset (e, 0, 256);
+      memset (o, 0, 256);
+      e[c1] = 1;
+      o[c2] = 1;
+
       detect_info->rejected
 	|= (CATEGORY_MASK_UTF_16_BE | CATEGORY_MASK_UTF_16_LE);
+
+      while (1)
+	{
+	  ONE_MORE_BYTE (c1);
+	  ONE_MORE_BYTE (c2);
+	  if (! e[c1])
+	    {
+	      e[c1] = 1;
+	      e_num++;
+	      if (e_num >= 128)
+		break;
+	    }
+	  if (! o[c2])
+	    {
+	      o[c1] = 1;
+	      o_num++;
+	      if (o_num >= 128)
+		break;
+	    }
+	}
+      detect_info->rejected |= CATEGORY_MASK_UTF_16;
+      return 0;
     }
+
  no_more_source:
   return 1;
 }
@@ -1452,16 +1637,18 @@ decode_coding_utf_16 (coding)
   const unsigned char *src_base;
   int *charbuf = coding->charbuf + coding->charbuf_used;
   int *charbuf_end = coding->charbuf + coding->charbuf_size;
-  int consumed_chars = 0, consumed_chars_base;
+  int consumed_chars = 0, consumed_chars_base = 0;
   int multibytep = coding->src_multibyte;
-  enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
+  enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
   enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
   int surrogate = CODING_UTF_16_SURROGATE (coding);
   Lisp_Object attr, charset_list;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr1 = -1, byte_after_cr2 = -1;
 
   CODING_GET_INFO (coding, attr, charset_list);
 
-  if (bom == utf_16_with_bom)
+  if (bom == utf_with_bom)
     {
       int c, c1, c2;
 
@@ -1478,13 +1665,13 @@ decode_coding_utf_16 (coding)
 	  src = src_base;
 	  coding->errors++;
 	}
-      CODING_UTF_16_BOM (coding) = utf_16_without_bom;
+      CODING_UTF_16_BOM (coding) = utf_without_bom;
     }
-  else if (bom == utf_16_detect_bom)
+  else if (bom == utf_detect_bom)
     {
       /* We have already tried to detect BOM and failed in
 	 detect_coding.  */
-      CODING_UTF_16_BOM (coding) = utf_16_without_bom;
+      CODING_UTF_16_BOM (coding) = utf_without_bom;
     }
 
   while (1)
@@ -1497,13 +1684,19 @@ decode_coding_utf_16 (coding)
       if (charbuf + 2 >= charbuf_end)
 	break;
 
-      ONE_MORE_BYTE (c1);
+      if (byte_after_cr1 >= 0)
+	c1 = byte_after_cr1, byte_after_cr1 = -1;
+      else
+	ONE_MORE_BYTE (c1);
       if (c1 < 0)
 	{
 	  *charbuf++ = -c1;
 	  continue;
 	}
-      ONE_MORE_BYTE (c2);
+      if (byte_after_cr2 >= 0)
+	c2 = byte_after_cr2, byte_after_cr2 = -1;
+      else
+	ONE_MORE_BYTE (c2);
       if (c2 < 0)
 	{
 	  *charbuf++ = ASCII_BYTE_P (c1) ? c1 : BYTE8_TO_CHAR (c1);
@@ -1512,6 +1705,7 @@ decode_coding_utf_16 (coding)
 	}
       c = (endian == utf_16_big_endian
 	   ? ((c1 << 8) | c2) : ((c2 << 8) | c1));
+
       if (surrogate)
 	{
 	  if (! UTF_16_LOW_SURROGATE_P (c))
@@ -1540,7 +1734,14 @@ decode_coding_utf_16 (coding)
 	  if (UTF_16_HIGH_SURROGATE_P (c))
 	    CODING_UTF_16_SURROGATE (coding) = surrogate = c;
 	  else
-	    *charbuf++ = c;
+	    {
+	      if (eol_crlf && c == '\r')
+		{
+		  ONE_MORE_BYTE (byte_after_cr1);
+		  ONE_MORE_BYTE (byte_after_cr2);
+		}
+	      *charbuf++ = c;
+	    }
 	}
     }
 
@@ -1560,7 +1761,7 @@ encode_coding_utf_16 (coding)
   unsigned char *dst = coding->destination + coding->produced;
   unsigned char *dst_end = coding->destination + coding->dst_bytes;
   int safe_room = 8;
-  enum utf_16_bom_type bom = CODING_UTF_16_BOM (coding);
+  enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
   int big_endian = CODING_UTF_16_ENDIAN (coding) == utf_16_big_endian;
   int produced_chars = 0;
   Lisp_Object attrs, charset_list;
@@ -1568,14 +1769,14 @@ encode_coding_utf_16 (coding)
 
   CODING_GET_INFO (coding, attrs, charset_list);
 
-  if (bom != utf_16_without_bom)
+  if (bom != utf_without_bom)
     {
       ASSURE_DESTINATION (safe_room);
       if (big_endian)
 	EMIT_TWO_BYTES (0xFE, 0xFF);
       else
 	EMIT_TWO_BYTES (0xFF, 0xFE);
-      CODING_UTF_16_BOM (coding) = utf_16_without_bom;
+      CODING_UTF_16_BOM (coding) = utf_without_bom;
     }
 
   while (charbuf < charbuf_end)
@@ -1711,7 +1912,7 @@ emacs_mule_char (coding, src, nbytes, nchars, id)
     {
       if (c >= 0xA0)
 	{
-	  /* Old style component character of a compostion.  */
+	  /* Old style component character of a composition.  */
 	  if (c == 0xA0)
 	    {
 	      ONE_MORE_BYTE (c);
@@ -1831,7 +2032,7 @@ detect_coding_emacs_mule (coding, detect_info)
 	  /* Perhaps the start of composite character.  We simple skip
 	     it because analyzing it is too heavy for detecting.  But,
 	     at least, we check that the composite character
-	     constitues of more than 4 bytes.  */
+	     constitutes of more than 4 bytes.  */
 	  const unsigned char *src_base;
 
 	repeat:
@@ -2072,6 +2273,8 @@ decode_coding_emacs_mule (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
 
@@ -2085,7 +2288,10 @@ decode_coding_emacs_mule (coding)
       if (charbuf >= charbuf_end)
 	break;
 
-      ONE_MORE_BYTE (c);
+      if (byte_after_cr >= 0)
+	c = byte_after_cr, byte_after_cr = -1;
+      else
+	ONE_MORE_BYTE (c);
       if (c < 0)
 	{
 	  *charbuf++ = -c;
@@ -2093,6 +2299,8 @@ decode_coding_emacs_mule (coding)
 	}
       else if (c < 0x80)
 	{
+	  if (eol_crlf && c == '\r')
+	    ONE_MORE_BYTE (byte_after_cr);
 	  *charbuf++ = c;
 	  char_offset++;
 	}
@@ -2241,8 +2449,10 @@ encode_coding_emacs_mule (coding)
 	  if (preferred_charset_id >= 0)
 	    {
 	      charset = CHARSET_FROM_ID (preferred_charset_id);
-	      if (! CHAR_CHARSET_P (c, charset))
-		charset = char_charset (c, charset_list, NULL);
+	      if (CHAR_CHARSET_P (c, charset))
+		code = ENCODE_CHAR (charset, c);
+	      else
+		charset = char_charset (c, charset_list, &code);
 	    }
 	  else
 	    charset = char_charset (c, charset_list, &code);
@@ -2550,6 +2760,7 @@ detect_coding_iso_2022 (coding, detect_info)
   int i;
   int rejected = 0;
   int found = 0;
+  int composition_count = -1;
 
   detect_info->checked |= CATEGORY_MASK_ISO;
 
@@ -2558,6 +2769,8 @@ detect_coding_iso_2022 (coding, detect_info)
       struct coding_system *this = &(coding_categories[i]);
       Lisp_Object attrs, val;
 
+      if (this->id < 0)
+	continue;
       attrs = CODING_ID_ATTRS (this->id);
       if (CODING_ISO_FLAGS (this) & CODING_ISO_FLAG_FULL_SUPPORT
 	  && ! EQ (CODING_ATTR_SAFE_CHARSETS (attrs), Viso_2022_charset_list))
@@ -2616,10 +2829,20 @@ detect_coding_iso_2022 (coding, detect_info)
 	      rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
 	      break;
 	    }
+	  else if (c == '1')
+	    {
+	      /* End of composition.  */
+	      if (composition_count < 0
+		  || composition_count > MAX_COMPOSITION_COMPONENTS)
+		/* Invalid */
+		break;
+	      composition_count = -1;
+	      found |= CATEGORY_MASK_ISO;
+	    }
 	  else if (c >= '0' && c <= '4')
 	    {
 	      /* ESC <Fp> for start/end composition.  */
-	      found |= CATEGORY_MASK_ISO;
+	      composition_count = 0;
 	      break;
 	    }
 	  else
@@ -2690,6 +2913,8 @@ detect_coding_iso_2022 (coding, detect_info)
 	    continue;
 	  if (c < 0x80)
 	    {
+	      if (composition_count >= 0)
+		composition_count++;
 	      single_shifting = 0;
 	      break;
 	    }
@@ -2714,9 +2939,17 @@ detect_coding_iso_2022 (coding, detect_info)
 		    }
 
 		  if (i & 1 && src < src_end)
-		    rejected |= CATEGORY_MASK_ISO_8_2;
+		    {
+		      rejected |= CATEGORY_MASK_ISO_8_2;
+		      if (composition_count >= 0)
+			composition_count += i;
+		    }
 		  else
-		    found |= CATEGORY_MASK_ISO_8_2;
+		    {
+		      found |= CATEGORY_MASK_ISO_8_2;
+		      if (composition_count >= 0)
+			composition_count += i / 2;
+		    }
 		}
 	      break;
 	    }
@@ -2833,6 +3066,8 @@ detect_coding_iso_2022 (coding, detect_info)
 	    break;							\
 	if (p == src_end - 1)						\
 	  {								\
+	    if (coding->mode & CODING_MODE_LAST_BLOCK)			\
+	      goto invalid_code;					\
 	    /* The current composition doesn't end in the current	\
 	       source.  */						\
 	    record_conversion_result					\
@@ -2945,6 +3180,8 @@ decode_coding_iso_2022 (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
   setup_iso_safe_charsets (attrs);
@@ -2962,7 +3199,10 @@ decode_coding_iso_2022 (coding)
       if (charbuf >= charbuf_end)
 	break;
 
-      ONE_MORE_BYTE (c1);
+      if (byte_after_cr >= 0)
+	c1 = byte_after_cr, byte_after_cr = -1;
+      else
+	ONE_MORE_BYTE (c1);
       if (c1 < 0)
 	goto invalid_code;
 
@@ -2975,10 +3215,15 @@ decode_coding_iso_2022 (coding)
 	      if (composition_state == COMPOSING_RULE
 		  || composition_state == COMPOSING_COMPONENT_RULE)
 		{
-		  DECODE_COMPOSITION_RULE (c1);
-		  components[component_idx++] = c1;
-		  composition_state--;
-		  continue;
+		  if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
+		    {
+		      DECODE_COMPOSITION_RULE (c1);
+		      components[component_idx++] = c1;
+		      composition_state--;
+		      continue;
+		    }
+		  /* Too long composition.  */
+		  MAYBE_FINISH_COMPOSITION ();
 		}
 	    }
 	  if (charset_id_0 < 0
@@ -2995,10 +3240,14 @@ decode_coding_iso_2022 (coding)
 	      if (composition_state == COMPOSING_RULE
 		  || composition_state == COMPOSING_COMPONENT_RULE)
 		{
-		  DECODE_COMPOSITION_RULE (c1);
-		  components[component_idx++] = c1;
-		  composition_state--;
-		  continue;
+		  if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
+		    {
+		      DECODE_COMPOSITION_RULE (c1);
+		      components[component_idx++] = c1;
+		      composition_state--;
+		      continue;
+		    }
+		  MAYBE_FINISH_COMPOSITION ();
 		}
 	    }
 	  if (charset_id_0 < 0)
@@ -3021,6 +3270,8 @@ decode_coding_iso_2022 (coding)
 	  break;
 
 	case ISO_control_0:
+	  if (eol_crlf && c1 == '\r')
+	    ONE_MORE_BYTE (byte_after_cr);
 	  MAYBE_FINISH_COMPOSITION ();
 	  charset = CHARSET_FROM_ID (charset_ascii);
 	  break;
@@ -3354,11 +3605,20 @@ decode_coding_iso_2022 (coding)
 	}
       else
 	{
-	  components[component_idx++] = c;
-	  if (method == COMPOSITION_WITH_RULE
-	      || (method == COMPOSITION_WITH_RULE_ALTCHARS
-		  && composition_state == COMPOSING_COMPONENT_CHAR))
-	    composition_state++;
+	  if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
+	    {
+	      components[component_idx++] = c;
+	      if (method == COMPOSITION_WITH_RULE
+		  || (method == COMPOSITION_WITH_RULE_ALTCHARS
+		      && composition_state == COMPOSING_COMPONENT_CHAR))
+		composition_state++;
+	    }
+	  else
+	    {
+	      MAYBE_FINISH_COMPOSITION ();
+	      *charbuf++ = c;
+	      char_offset++;
+	    }
 	}
       continue;
 
@@ -4091,6 +4351,8 @@ decode_coding_sjis (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
 
@@ -4111,11 +4373,18 @@ decode_coding_sjis (coding)
       if (charbuf >= charbuf_end)
 	break;
 
-      ONE_MORE_BYTE (c);
+      if (byte_after_cr >= 0)
+	c = byte_after_cr, byte_after_cr = -1;
+      else
+	ONE_MORE_BYTE (c);
       if (c < 0)
 	goto invalid_code;
       if (c < 0x80)
-	charset = charset_roman;
+	{
+	  if (eol_crlf && c == '\r')
+	    ONE_MORE_BYTE (byte_after_cr);
+	  charset = charset_roman;
+	}
       else if (c == 0x80 || c == 0xA0)
 	goto invalid_code;
       else if (c >= 0xA1 && c <= 0xDF)
@@ -4193,6 +4462,8 @@ decode_coding_big5 (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
   val = charset_list;
@@ -4210,12 +4481,19 @@ decode_coding_big5 (coding)
       if (charbuf >= charbuf_end)
 	break;
 
-      ONE_MORE_BYTE (c);
+      if (byte_after_cr >= 0)
+	c = byte_after_cr, byte_after_cr = -1;
+      else
+	ONE_MORE_BYTE (c);
 
       if (c < 0)
 	goto invalid_code;
       if (c < 0x80)
-	charset = charset_roman;
+	{
+	  if (eol_crlf && c == '\r')
+	    ONE_MORE_BYTE (byte_after_cr);
+	  charset = charset_roman;
+	}
       else
 	{
 	  /* BIG5 -> Big5 */
@@ -4590,7 +4868,7 @@ encode_coding_ccl (coding)
       else
 	{
 	  ASSURE_DESTINATION (ccl.produced);
-	  for (i = 0; i < ccl.produced; i++)	
+	  for (i = 0; i < ccl.produced; i++)
 	    *dst++ = destination_charbuf[i] & 0xFF;
 	  produced_chars += ccl.produced;
 	}
@@ -4632,10 +4910,19 @@ static void
 decode_coding_raw_text (coding)
      struct coding_system *coding;
 {
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+
   coding->chars_at_source = 1;
-  coding->consumed_char = 0;
-  coding->consumed = 0;
-  record_conversion_result (coding, CODING_RESULT_SUCCESS);
+  coding->consumed_char = coding->src_chars;
+  coding->consumed = coding->src_bytes;
+  if (eol_crlf && coding->source[coding->src_bytes - 1] == '\r')
+    {
+      coding->consumed_char--;
+      coding->consumed--;
+      record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_SRC);
+    }
+  else
+    record_conversion_result (coding, CODING_RESULT_SUCCESS);
 }
 
 static int
@@ -4702,7 +4989,6 @@ encode_coding_raw_text (coding)
 		*dst++ = CHAR_TO_BYTE8 (c);
 	      else
 		CHAR_STRING_ADVANCE (c, dst);
-	      produced_chars++;
 	    }
 	}
       else
@@ -4710,8 +4996,8 @@ encode_coding_raw_text (coding)
 	  ASSURE_DESTINATION (charbuf_end - charbuf);
 	  while (charbuf < charbuf_end && dst < dst_end)
 	    *dst++ = *charbuf++;
-	  produced_chars = dst - (coding->destination + coding->dst_bytes);
 	}
+      produced_chars = dst - (coding->destination + coding->produced);
     }
   record_conversion_result (coding, CODING_RESULT_SUCCESS);
   coding->produced_char += produced_chars;
@@ -4732,16 +5018,20 @@ detect_coding_charset (coding, detect_info)
   const unsigned char *src_end = coding->source + coding->src_bytes;
   int multibytep = coding->src_multibyte;
   int consumed_chars = 0;
-  Lisp_Object attrs, valids;
+  Lisp_Object attrs, valids, name;
   int found = 0;
   int head_ascii = coding->head_ascii;
+  int check_latin_extra = 0;
 
   detect_info->checked |= CATEGORY_MASK_CHARSET;
 
   coding = &coding_categories[coding_category_charset];
   attrs = CODING_ID_ATTRS (coding->id);
   valids = AREF (attrs, coding_attr_charset_valids);
-
+  name = CODING_ID_NAME (coding->id);
+  if (VECTORP (Vlatin_extra_code_table)
+      && strcmp ((char *) SDATA (SYMBOL_NAME (name)), "iso-8859-"))
+    check_latin_extra = 1;
   if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
     src += head_ascii;
 
@@ -4760,7 +5050,13 @@ detect_coding_charset (coding, detect_info)
       if (NILP (val))
 	break;
       if (c >= 0x80)
-	found = CATEGORY_MASK_CHARSET;
+	{
+	  if (c < 0xA0
+	      && check_latin_extra
+	      && NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
+	    break;
+	  found = CATEGORY_MASK_CHARSET;
+	}
       if (INTEGERP (val))
 	{
 	  charset = CHARSET_FROM_ID (XFASTINT (val));
@@ -4770,7 +5066,7 @@ detect_coding_charset (coding, detect_info)
 	      if (src == src_end)
 		goto too_short;
 	      ONE_MORE_BYTE (c);
-	      if (c < charset->code_space[(dim - 1 - idx) * 2] 
+	      if (c < charset->code_space[(dim - 1 - idx) * 2]
 		  || c > charset->code_space[(dim - 1 - idx) * 2 + 1])
 		break;
 	    }
@@ -4829,6 +5125,8 @@ decode_coding_charset (coding)
   int char_offset = coding->produced_char;
   int last_offset = char_offset;
   int last_id = charset_ascii;
+  int eol_crlf = EQ (CODING_ID_EOL_TYPE (coding->id), Qdos);
+  int byte_after_cr = -1;
 
   CODING_GET_INFO (coding, attrs, charset_list);
   valids = AREF (attrs, coding_attr_charset_valids);
@@ -4848,13 +5146,23 @@ decode_coding_charset (coding)
       if (charbuf >= charbuf_end)
 	break;
 
-      ONE_MORE_BYTE (c);
+      if (byte_after_cr >= 0)
+	{
+	  c = byte_after_cr;
+	  byte_after_cr = -1;
+	}
+      else
+	{
+	  ONE_MORE_BYTE (c);
+	  if (eol_crlf && c == '\r')
+	    ONE_MORE_BYTE (byte_after_cr);
+	}
       if (c < 0)
 	goto invalid_code;
       code = c;
 
       val = AREF (valids, c);
-      if (NILP (val))
+      if (! INTEGERP (val) && ! CONSP (val))
 	goto invalid_code;
       if (INTEGERP (val))
 	{
@@ -5090,18 +5398,24 @@ setup_coding_system (coding_system, coding)
     }
   else if (EQ (coding_type, Qutf_8))
     {
+      val = AREF (attrs, coding_attr_utf_bom);
+      CODING_UTF_8_BOM (coding) = (CONSP (val) ? utf_detect_bom
+				   : EQ (val, Qt) ? utf_with_bom
+				   : utf_without_bom);
       coding->detector = detect_coding_utf_8;
       coding->decoder = decode_coding_utf_8;
       coding->encoder = encode_coding_utf_8;
       coding->common_flags
 	|= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
+      if (CODING_UTF_8_BOM (coding) == utf_detect_bom)
+	coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
     }
   else if (EQ (coding_type, Qutf_16))
     {
-      val = AREF (attrs, coding_attr_utf_16_bom);
-      CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_16_detect_bom
-				    : EQ (val, Qt) ? utf_16_with_bom
-				    : utf_16_without_bom);
+      val = AREF (attrs, coding_attr_utf_bom);
+      CODING_UTF_16_BOM (coding) = (CONSP (val) ? utf_detect_bom
+				    : EQ (val, Qt) ? utf_with_bom
+				    : utf_without_bom);
       val = AREF (attrs, coding_attr_utf_16_endian);
       CODING_UTF_16_ENDIAN (coding) = (EQ (val, Qbig) ? utf_16_big_endian
 				       : utf_16_little_endian);
@@ -5111,7 +5425,7 @@ setup_coding_system (coding_system, coding)
       coding->encoder = encode_coding_utf_16;
       coding->common_flags
 	|= (CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK);
-      if (CODING_UTF_16_BOM (coding) == utf_16_detect_bom)
+      if (CODING_UTF_16_BOM (coding) == utf_detect_bom)
 	coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
     }
   else if (EQ (coding_type, Qccl))
@@ -5522,6 +5836,7 @@ detect_coding (coding)
   coding_set_source (coding);
 
   src_end = coding->source + coding->src_bytes;
+  coding->head_ascii = 0;
 
   /* If we have not yet decided the text encoding type, detect it
      now.  */
@@ -5529,32 +5844,55 @@ detect_coding (coding)
     {
       int c, i;
       struct coding_detection_info detect_info;
+      int null_byte_found = 0, eight_bit_found = 0;
 
       detect_info.checked = detect_info.found = detect_info.rejected = 0;
-      for (i = 0, src = coding->source; src < src_end; i++, src++)
+      for (src = coding->source; src < src_end; src++)
 	{
 	  c = *src;
 	  if (c & 0x80)
-	    break;
-	  if (c < 0x20
-	      && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
-	      && ! inhibit_iso_escape_detection
-	      && ! detect_info.checked)
 	    {
-	      coding->head_ascii = src - (coding->source + coding->consumed);
-	      if (detect_coding_iso_2022 (coding, &detect_info))
+	      eight_bit_found = 1;
+	      if (null_byte_found)
+		break;
+	    }
+	  else if (c < 0x20)
+	    {
+	      if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+		  && ! inhibit_iso_escape_detection
+		  && ! detect_info.checked)
 		{
-		  /* We have scanned the whole data.  */
-		  if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
-		    /* We didn't find an 8-bit code.  */
-		    src = src_end;
-		  break;
+		  if (detect_coding_iso_2022 (coding, &detect_info))
+		    {
+		      /* We have scanned the whole data.  */
+		      if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+			{
+			  /* We didn't find an 8-bit code.  We may
+			     have found a null-byte, but it's very
+			     rare that a binary file confirm to
+			     ISO-2022.  */
+			  src = src_end;
+			  coding->head_ascii = src - coding->source;
+			}
+		      detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
+		      break;
+		    }
 		}
+	      else if (! c)
+		{
+		  null_byte_found = 1;
+		  if (eight_bit_found)
+		    break;
+		}
+	      if (! eight_bit_found)
+		coding->head_ascii++;
 	    }
+	  else if (! eight_bit_found)
+	    coding->head_ascii++;
 	}
-      coding->head_ascii = src - (coding->source + coding->consumed);
 
-      if (coding->head_ascii < coding->src_bytes
+      if (null_byte_found || eight_bit_found
+	  || coding->head_ascii < coding->src_bytes
 	  || detect_info.found)
 	{
 	  enum coding_category category;
@@ -5570,39 +5908,49 @@ detect_coding (coding)
 		  break;
 	      }
 	  else
-	    for (i = 0; i < coding_category_raw_text; i++)
-	      {
-		category = coding_priorities[i];
-		this = coding_categories + category;
-		if (this->id < 0)
-		  {
-		    /* No coding system of this category is defined.  */
-		    detect_info.rejected |= (1 << category);
-		  }
-		else if (category >= coding_category_raw_text)
-		  continue;
-		else if (detect_info.checked & (1 << category))
-		  {
-		    if (detect_info.found & (1 << category))
+	    {
+	      if (null_byte_found)
+		{
+		  detect_info.checked |= ~CATEGORY_MASK_UTF_16;
+		  detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
+		}
+	      for (i = 0; i < coding_category_raw_text; i++)
+		{
+		  category = coding_priorities[i];
+		  this = coding_categories + category;
+		  if (this->id < 0)
+		    {
+		      /* No coding system of this category is defined.  */
+		      detect_info.rejected |= (1 << category);
+		    }
+		  else if (category >= coding_category_raw_text)
+		    continue;
+		  else if (detect_info.checked & (1 << category))
+		    {
+		      if (detect_info.found & (1 << category))
+			break;
+		    }
+		  else if ((*(this->detector)) (coding, &detect_info)
+			   && detect_info.found & (1 << category))
+		    {
+		      if (category == coding_category_utf_16_auto)
+			{
+			  if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+			    category = coding_category_utf_16_le;
+			  else
+			    category = coding_category_utf_16_be;
+			}
 		      break;
-		  }
-		else if ((*(this->detector)) (coding, &detect_info)
-			 && detect_info.found & (1 << category))
-		  {
-		    if (category == coding_category_utf_16_auto)
-		      {
-			if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
-			  category = coding_category_utf_16_le;
-			else
-			  category = coding_category_utf_16_be;
-		      }
-		    break;
-		  }
-	      }
-	  
+		    }
+		}
+	    }
+
 	  if (i < coding_category_raw_text)
 	    setup_coding_system (CODING_ID_NAME (this->id), coding);
-	  else if (detect_info.rejected == CATEGORY_MASK_ANY)
+	  else if (null_byte_found)
+	    setup_coding_system (Qno_conversion, coding);
+	  else if ((detect_info.rejected & CATEGORY_MASK_ANY)
+		   == CATEGORY_MASK_ANY)
 	    setup_coding_system (Qraw_text, coding);
 	  else if (detect_info.rejected)
 	    for (i = 0; i < coding_category_raw_text; i++)
@@ -5614,6 +5962,25 @@ detect_coding (coding)
 		}
 	}
     }
+  else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
+	   == coding_category_utf_8_auto)
+    {
+      Lisp_Object coding_systems;
+      struct coding_detection_info detect_info;
+
+      coding_systems
+	= AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
+      detect_info.found = detect_info.rejected = 0;
+      coding->head_ascii = 0;
+      if (CONSP (coding_systems)
+	  && detect_coding_utf_8 (coding, &detect_info))
+	{
+	  if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
+	    setup_coding_system (XCAR (coding_systems), coding);
+	  else
+	    setup_coding_system (XCDR (coding_systems), coding);
+	}
+    }
   else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
 	   == coding_category_utf_16_auto)
     {
@@ -5621,8 +5988,9 @@ detect_coding (coding)
       struct coding_detection_info detect_info;
 
       coding_systems
-	= AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_16_bom);
+	= AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
       detect_info.found = detect_info.rejected = 0;
+      coding->head_ascii = 0;
       if (CONSP (coding_systems)
 	  && detect_coding_utf_16 (coding, &detect_info))
 	{
@@ -5641,7 +6009,7 @@ decode_eol (coding)
 {
   Lisp_Object eol_type;
   unsigned char *p, *pbeg, *pend;
-  
+
   eol_type = CODING_ID_EOL_TYPE (coding->id);
   if (EQ (eol_type, Qunix))
     return;
@@ -5880,19 +6248,21 @@ produce_chars (coding, translation_table, last_block)
 {
   unsigned char *dst = coding->destination + coding->produced;
   unsigned char *dst_end = coding->destination + coding->dst_bytes;
-  int produced;
-  int produced_chars = 0;
+  EMACS_INT produced;
+  EMACS_INT produced_chars = 0;
   int carryover = 0;
 
   if (! coding->chars_at_source)
     {
-      /* Characters are in coding->charbuf.  */
+      /* Source characters are in coding->charbuf.  */
       int *buf = coding->charbuf;
       int *buf_end = buf + coding->charbuf_used;
 
-      if (BUFFERP (coding->src_object)
-	  && EQ (coding->src_object, coding->dst_object))
-	dst_end = ((unsigned char *) coding->source) + coding->consumed;
+      if (EQ (coding->src_object, coding->dst_object))
+	{
+	  coding_set_source (coding);
+	  dst_end = ((unsigned char *) coding->source) + coding->consumed;
+	}
 
       while (buf < buf_end)
 	{
@@ -5919,7 +6289,13 @@ produce_chars (coding, translation_table, last_block)
 					   buf_end - buf
 					   + MAX_MULTIBYTE_LENGTH * to_nchars,
 					   dst);
-		  dst_end = coding->destination + coding->dst_bytes;
+		  if (EQ (coding->src_object, coding->dst_object))
+		    {
+		      coding_set_source (coding);
+		      dst_end = ((unsigned char *) coding->source) + coding->consumed;
+		    }
+		  else
+		    dst_end = coding->destination + coding->dst_bytes;
 		}
 
 	      for (i = 0; i < to_nchars; i++)
@@ -5928,7 +6304,7 @@ produce_chars (coding, translation_table, last_block)
 		    c = XINT (AREF (trans, i));
 		  if (coding->dst_multibyte
 		      || ! CHAR_BYTE8_P (c))
-		    CHAR_STRING_ADVANCE (c, dst);
+		    CHAR_STRING_ADVANCE_NO_UNIFY (c, dst);
 		  else
 		    *dst++ = CHAR_TO_BYTE8 (c);
 		}
@@ -5945,18 +6321,18 @@ produce_chars (coding, translation_table, last_block)
     }
   else
     {
+      /* Source characters are at coding->source.  */
       const unsigned char *src = coding->source;
-      const unsigned char *src_end = src + coding->src_bytes;
-      Lisp_Object eol_type;
-
-      eol_type = CODING_ID_EOL_TYPE (coding->id);
+      const unsigned char *src_end = src + coding->consumed;
 
+      if (EQ (coding->dst_object, coding->src_object))
+	dst_end = (unsigned char *) src;
       if (coding->src_multibyte != coding->dst_multibyte)
 	{
 	  if (coding->src_multibyte)
 	    {
 	      int multibytep = 1;
-	      int consumed_chars;
+	      EMACS_INT consumed_chars = 0;
 
 	      while (1)
 		{
@@ -5964,37 +6340,23 @@ produce_chars (coding, translation_table, last_block)
 		  int c;
 
 		  ONE_MORE_BYTE (c);
-		  if (c == '\r')
+		  if (dst == dst_end)
 		    {
-		      if (EQ (eol_type, Qdos))
+		      if (EQ (coding->src_object, coding->dst_object))
+			dst_end = (unsigned char *) src;
+		      if (dst == dst_end)
 			{
-			  if (src == src_end)
-			    {
-			      record_conversion_result
-				(coding, CODING_RESULT_INSUFFICIENT_SRC);
-			      goto no_more_source;
-			    }
-			  if (*src == '\n')
-			    c = *src++;
+			  EMACS_INT offset = src - coding->source;
+
+			  dst = alloc_destination (coding, src_end - src + 1,
+						   dst);
+			  dst_end = coding->destination + coding->dst_bytes;
+			  coding_set_source (coding);
+			  src = coding->source + offset;
+			  src_end = coding->source + coding->src_bytes;
+			  if (EQ (coding->src_object, coding->dst_object))
+			    dst_end = (unsigned char *) src;
 			}
-		      else if (EQ (eol_type, Qmac))
-			c = '\n';
-		    }
-		  if (dst == dst_end)
-		    {
-		      coding->consumed = src - coding->source;
-
-		    if (EQ (coding->src_object, coding->dst_object))
-		      dst_end = (unsigned char *) src;
-		    if (dst == dst_end)
-		      {
-			dst = alloc_destination (coding, src_end - src + 1,
-						 dst);
-			dst_end = coding->destination + coding->dst_bytes;
-			coding_set_source (coding);
-			src = coding->source + coding->consumed;
-			src_end = coding->source + coding->src_bytes;
-		      }
 		    }
 		  *dst++ = c;
 		  produced_chars++;
@@ -6008,31 +6370,26 @@ produce_chars (coding, translation_table, last_block)
 		int multibytep = 1;
 		int c = *src++;
 
-		if (c == '\r')
-		  {
-		    if (EQ (eol_type, Qdos))
-		      {
-			if (src < src_end
-			    && *src == '\n')
-			  c = *src++;
-		      }
-		    else if (EQ (eol_type, Qmac))
-		      c = '\n';
-		  }
 		if (dst >= dst_end - 1)
 		  {
-		    coding->consumed = src - coding->source;
-
 		    if (EQ (coding->src_object, coding->dst_object))
 		      dst_end = (unsigned char *) src;
 		    if (dst >= dst_end - 1)
 		      {
-			dst = alloc_destination (coding, src_end - src + 2,
-						 dst);
+			EMACS_INT offset = src - coding->source;
+			EMACS_INT more_bytes;
+
+			if (EQ (coding->src_object, coding->dst_object))
+			  more_bytes = ((src_end - src) / 2) + 2;
+			else
+			  more_bytes = src_end - src + 2;
+			dst = alloc_destination (coding, more_bytes, dst);
 			dst_end = coding->destination + coding->dst_bytes;
 			coding_set_source (coding);
-			src = coding->source + coding->consumed;
+			src = coding->source + offset;
 			src_end = coding->source + coding->src_bytes;
+			if (EQ (coding->src_object, coding->dst_object))
+			  dst_end = (unsigned char *) src;
 		      }
 		  }
 		EMIT_ONE_BYTE (c);
@@ -6042,7 +6399,7 @@ produce_chars (coding, translation_table, last_block)
 	{
 	  if (!EQ (coding->src_object, coding->dst_object))
 	    {
-	      int require = coding->src_bytes - coding->dst_bytes;
+	      EMACS_INT require = coding->src_bytes - coding->dst_bytes;
 
 	      if (require > 0)
 		{
@@ -6054,28 +6411,10 @@ produce_chars (coding, translation_table, last_block)
 		  src_end = coding->source + coding->src_bytes;
 		}
 	    }
-	  produced_chars = coding->src_chars;
+	  produced_chars = coding->consumed_char;
 	  while (src < src_end)
-	    {
-	      int c = *src++;
-
-	      if (c == '\r')
-		{
-		  if (EQ (eol_type, Qdos))
-		    {
-		      if (src < src_end
-			  && *src == '\n')
-			c = *src++;
-		      produced_chars--;
-		    }
-		  else if (EQ (eol_type, Qmac))
-		    c = '\n';
-		}
-	      *dst++ = c;
-	    }
+	    *dst++ = *src++;
 	}
-      coding->consumed = coding->src_bytes;
-      coding->consumed_char = coding->src_chars;
     }
 
   produced = dst - (coding->destination + coding->produced);
@@ -6538,12 +6877,12 @@ consume_chars (coding, translation_table, max_lookup)
 	  if (coding->encoder == encode_coding_raw_text)
 	    c = *src++, pos++;
 	  else if ((bytes = MULTIBYTE_LENGTH (src, src_end)) > 0)
-	    c = STRING_CHAR_ADVANCE (src), pos += bytes;
+	    c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos += bytes;
 	  else
 	    c = BYTE8_TO_CHAR (*src), src++, pos++;
 	}
       else
-	c = STRING_CHAR_ADVANCE (src), pos++;
+	c = STRING_CHAR_ADVANCE_NO_UNIFY (src), pos++;
       if ((c == '\r') && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
 	c = '\n';
       if (! EQ (eol_type, Qunix))
@@ -6686,14 +7025,18 @@ make_conversion_work_buffer (multibyte)
     }
   else
     {
-      name = Vcode_conversion_workbuf_name;
-      workbuf = Fget_buffer_create (name);
-      if (NILP (Vcode_conversion_reused_workbuf))
-	Vcode_conversion_reused_workbuf = workbuf;
+      if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
+	Vcode_conversion_reused_workbuf
+	  = Fget_buffer_create (Vcode_conversion_workbuf_name);
+      workbuf = Vcode_conversion_reused_workbuf;
     }
   current = current_buffer;
   set_buffer_internal (XBUFFER (workbuf));
-  Ferase_buffer ();      
+  /* We can't allow modification hooks to run in the work buffer.  For
+     instance, directory_files_internal assumes that file decoding
+     doesn't compile new regexps.  */
+  Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
+  Ferase_buffer ();
   current_buffer->undo_list = Qt;
   current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
   set_buffer_internal (current);
@@ -6852,11 +7195,11 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
   EMACS_INT chars = to - from;
   EMACS_INT bytes = to_byte - from_byte;
   Lisp_Object attrs;
-  Lisp_Object buffer;
   int saved_pt = -1, saved_pt_byte;
   int need_marker_adjustment = 0;
+  Lisp_Object old_deactivate_mark;
 
-  buffer = Fcurrent_buffer ();
+  old_deactivate_mark = Vdeactivate_mark;
 
   if (NILP (dst_object))
     {
@@ -6891,6 +7234,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
 	    }
 	  saved_pt = PT, saved_pt_byte = PT_BYTE;
 	  TEMP_SET_PT_BOTH (from, from_byte);
+	  current_buffer->text->inhibit_shrinking = 1;
 	  del_range_both (from, from_byte, to, to_byte, 1);
 	  coding->src_pos = -chars;
 	  coding->src_pos_byte = -bytes;
@@ -6910,10 +7254,10 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
       || (! NILP (CODING_ATTR_POST_READ (attrs))
 	  && NILP (dst_object)))
     {
-      coding->dst_object = code_conversion_save (1, 1);
+      coding->dst_multibyte = !CODING_FOR_UNIBYTE (coding);
+      coding->dst_object = code_conversion_save (1, coding->dst_multibyte);
       coding->dst_pos = BEG;
       coding->dst_pos_byte = BEG_BYTE;
-      coding->dst_multibyte = 1;
     }
   else if (BUFFERP (dst_object))
     {
@@ -6928,6 +7272,9 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
     {
       code_conversion_save (0, 0);
       coding->dst_object = Qnil;
+      /* Most callers presume this will return a multibyte result, and they
+	 won't use `binary' or `raw-text' anyway, so let's not worry about
+	 CODING_FOR_UNIBYTE.  */
       coding->dst_multibyte = 1;
     }
 
@@ -6938,12 +7285,13 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
 
   if (! NILP (CODING_ATTR_POST_READ (attrs)))
     {
-      struct gcpro gcpro1, gcpro2;
+      struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
       EMACS_INT prev_Z = Z, prev_Z_BYTE = Z_BYTE;
       Lisp_Object val;
 
       TEMP_SET_PT_BOTH (coding->dst_pos, coding->dst_pos_byte);
-      GCPRO2 (coding->src_object, coding->dst_object);
+      GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
+	      old_deactivate_mark);
       val = safe_call1 (CODING_ATTR_POST_READ (attrs),
 			make_number (coding->produced_char));
       UNGCPRO;
@@ -6961,8 +7309,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
       set_buffer_internal (XBUFFER (coding->dst_object));
       if (dst_bytes < coding->produced)
 	{
-	  destination
-	    = (unsigned char *) xrealloc (destination, coding->produced);
+	  destination = xrealloc (destination, coding->produced);
 	  if (! destination)
 	    {
 	      record_conversion_result (coding,
@@ -6984,6 +7331,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
 	 As we have moved PT while replacing the original buffer
 	 contents, we must recover it now.  */
       set_buffer_internal (XBUFFER (src_object));
+      current_buffer->text->inhibit_shrinking = 0;
       if (saved_pt < from)
 	TEMP_SET_PT_BOTH (saved_pt, saved_pt_byte);
       else if (saved_pt < from + chars)
@@ -7019,6 +7367,7 @@ decode_coding_object (coding, src_object, from, from_byte, to, to_byte,
 	}
     }
 
+  Vdeactivate_mark = old_deactivate_mark;
   unbind_to (count, coding->dst_object);
 }
 
@@ -7035,12 +7384,12 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
   EMACS_INT chars = to - from;
   EMACS_INT bytes = to_byte - from_byte;
   Lisp_Object attrs;
-  Lisp_Object buffer;
   int saved_pt = -1, saved_pt_byte;
   int need_marker_adjustment = 0;
   int kill_src_buffer = 0;
+  Lisp_Object old_deactivate_mark;
 
-  buffer = Fcurrent_buffer ();
+  old_deactivate_mark = Vdeactivate_mark;
 
   coding->src_object = src_object;
   coding->src_chars = chars;
@@ -7082,11 +7431,15 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
 
       {
 	Lisp_Object args[3];
+	struct gcpro gcpro1, gcpro2, gcpro3, gcpro4, gcpro5;
 
+	GCPRO5 (coding->src_object, coding->dst_object, src_object, dst_object,
+		old_deactivate_mark);
 	args[0] = CODING_ATTR_PRE_WRITE (attrs);
 	args[1] = make_number (BEG);
 	args[2] = make_number (Z);
 	safe_call (3, args);
+	UNGCPRO;
       }
       if (XBUFFER (coding->src_object) != current_buffer)
 	kill_src_buffer = 1;
@@ -7137,8 +7490,13 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
 	}
       else
 	{
-	  coding->dst_pos = BUF_PT (XBUFFER (dst_object));
-	  coding->dst_pos_byte = BUF_PT_BYTE (XBUFFER (dst_object));
+	  struct buffer *current = current_buffer;
+
+	  set_buffer_temp (XBUFFER (dst_object));
+	  coding->dst_pos = PT;
+	  coding->dst_pos_byte = PT_BYTE;
+	  move_gap_both (coding->dst_pos, coding->dst_pos_byte);
+	  set_buffer_temp (current);
 	}
       coding->dst_multibyte
 	= ! NILP (XBUFFER (dst_object)->enable_multibyte_characters);
@@ -7217,6 +7575,8 @@ encode_coding_object (coding, src_object, from, from_byte, to, to_byte,
 
   if (kill_src_buffer)
     Fkill_buffer (coding->src_object);
+
+  Vdeactivate_mark = old_deactivate_mark;
   unbind_to (count, Qnil);
 }
 
@@ -7237,14 +7597,14 @@ DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
        doc: /* Return t if OBJECT is nil or a coding-system.
 See the documentation of `define-coding-system' for information
 about coding-system objects.  */)
-     (obj)
-     Lisp_Object obj;
+     (object)
+     Lisp_Object object;
 {
-  if (NILP (obj)
-      || CODING_SYSTEM_ID (obj) >= 0)
+  if (NILP (object)
+      || CODING_SYSTEM_ID (object) >= 0)
     return Qt;
-  if (! SYMBOLP (obj)
-      || NILP (Fget (obj, Qcoding_system_define_form)))
+  if (! SYMBOLP (object)
+      || NILP (Fget (object, Qcoding_system_define_form)))
     return Qnil;
   return Qt;
 }
@@ -7336,11 +7696,12 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
 {
   const unsigned char *src_end = src + src_bytes;
   Lisp_Object attrs, eol_type;
-  Lisp_Object val;
+  Lisp_Object val = Qnil;
   struct coding_system coding;
   int id;
   struct coding_detection_info detect_info;
   enum coding_category base_category;
+  int null_byte_found = 0, eight_bit_found = 0;
 
   if (NILP (coding_system))
     coding_system = Qundecided;
@@ -7355,6 +7716,7 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
   coding.src_multibyte = multibytep;
   coding.consumed = 0;
   coding.mode |= CODING_MODE_LAST_BLOCK;
+  coding.head_ascii = 0;
 
   detect_info.checked = detect_info.found = detect_info.rejected = 0;
 
@@ -7367,32 +7729,55 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
       int c, i;
 
       /* Skip all ASCII bytes except for a few ISO2022 controls.  */
-      for (i = 0; src < src_end; i++, src++)
+      for (; src < src_end; src++)
 	{
 	  c = *src;
 	  if (c & 0x80)
-	    break;
-	  if (c < 0x20
-	      && (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
-	      && ! inhibit_iso_escape_detection)
 	    {
-	      coding.head_ascii = src - coding.source;
-	      if (detect_coding_iso_2022 (&coding, &detect_info))
+	      eight_bit_found = 1;
+	      if (null_byte_found)
+		break;
+	    }
+	  else if (c < 0x20)
+	    {
+	      if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
+		  && ! inhibit_iso_escape_detection
+		  && ! detect_info.checked)
 		{
-		  /* We have scanned the whole data.  */
-		  if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
-		    /* We didn't find an 8-bit code.  */
-		    src = src_end;
-		  break;
+		  if (detect_coding_iso_2022 (&coding, &detect_info))
+		    {
+		      /* We have scanned the whole data.  */
+		      if (! (detect_info.rejected & CATEGORY_MASK_ISO_7_ELSE))
+			{
+			  /* We didn't find an 8-bit code.  We may
+			     have found a null-byte, but it's very
+			     rare that a binary file confirm to
+			     ISO-2022.  */
+			  src = src_end;
+			  coding.head_ascii = src - coding.source;
+			}
+		      detect_info.rejected |= ~CATEGORY_MASK_ISO_ESCAPE;
+		      break;
+		    }
+		}
+	      else if (! c)
+		{
+		  null_byte_found = 1;
+		  if (eight_bit_found)
+		    break;
 		}
+	      if (! eight_bit_found)
+		coding.head_ascii++;
 	    }
+	  else if (! eight_bit_found)
+	    coding.head_ascii++;
 	}
-      coding.head_ascii = src - coding.source;
 
-      if (src < src_end
+      if (null_byte_found || eight_bit_found
+	  || coding.head_ascii < coding.src_bytes
 	  || detect_info.found)
 	{
-	  if (src == src_end)
+	  if (coding.head_ascii == coding.src_bytes)
 	    /* As all bytes are 7-bit, we can ignore non-ISO-2022 codings.  */
 	    for (i = 0; i < coding_category_raw_text; i++)
 	      {
@@ -7402,44 +7787,48 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
 		  break;
 	      }
 	  else
-	    for (i = 0; i < coding_category_raw_text; i++)
-	      {
-		category = coding_priorities[i];
-		this = coding_categories + category;
+	    {
+	      if (null_byte_found)
+		{
+		  detect_info.checked |= ~CATEGORY_MASK_UTF_16;
+		  detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
+		}
+	      for (i = 0; i < coding_category_raw_text; i++)
+		{
+		  category = coding_priorities[i];
+		  this = coding_categories + category;
 
-		if (this->id < 0)
-		  {
-		    /* No coding system of this category is defined.  */
-		    detect_info.rejected |= (1 << category);
-		  }
-		else if (category >= coding_category_raw_text)
-		  continue;
-		else if (detect_info.checked & (1 << category))
-		  {
-		    if (highest
-			&& (detect_info.found & (1 << category)))
-		      break;
-		  }
-		else
-		  {
-		    if ((*(this->detector)) (&coding, &detect_info)
-			&& highest
-			&& (detect_info.found & (1 << category)))
-		      {
-			if (category == coding_category_utf_16_auto)
-			  {
-			    if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
-			      category = coding_category_utf_16_le;
-			    else
-			      category = coding_category_utf_16_be;
-			  }
+		  if (this->id < 0)
+		    {
+		      /* No coding system of this category is defined.  */
+		      detect_info.rejected |= (1 << category);
+		    }
+		  else if (category >= coding_category_raw_text)
+		    continue;
+		  else if (detect_info.checked & (1 << category))
+		    {
+		      if (highest
+			  && (detect_info.found & (1 << category)))
 			break;
-		      }
-		  }
-	      }
+		    }
+		  else if ((*(this->detector)) (&coding, &detect_info)
+			   && highest
+			   && (detect_info.found & (1 << category)))
+		    {
+		      if (category == coding_category_utf_16_auto)
+			{
+			  if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+			    category = coding_category_utf_16_le;
+			  else
+			    category = coding_category_utf_16_be;
+			}
+		      break;
+		    }
+		}
+	    }
 	}
 
-      if (detect_info.rejected == CATEGORY_MASK_ANY)
+      if ((detect_info.rejected & CATEGORY_MASK_ANY) == CATEGORY_MASK_ANY)
 	{
 	  detect_info.found = CATEGORY_MASK_RAW_TEXT;
 	  id = coding_categories[coding_category_raw_text].id;
@@ -7472,7 +7861,6 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
 	{
 	  int mask = detect_info.rejected | detect_info.found;
 	  int found = 0;
-	  val = Qnil;
 
 	  for (i = coding_category_raw_text - 1; i >= 0; i--)
 	    {
@@ -7497,6 +7885,19 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
 	  detect_info.found |= found;
 	}
     }
+  else if (base_category == coding_category_utf_8_auto)
+    {
+      if (detect_coding_utf_8 (&coding, &detect_info))
+	{
+	  struct coding_system *this;
+
+	  if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
+	    this = coding_categories + coding_category_utf_8_sig;
+	  else
+	    this = coding_categories + coding_category_utf_8_nosig;
+	  val = Fcons (make_number (this->id), Qnil);
+	}
+    }
   else if (base_category == coding_category_utf_16_auto)
     {
       if (detect_coding_utf_16 (&coding, &detect_info))
@@ -7522,14 +7923,19 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
 
   /* Then, detect eol-format if necessary.  */
   {
-    int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
+    int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
     Lisp_Object tail;
 
     if (VECTORP (eol_type))
       {
 	if (detect_info.found & ~CATEGORY_MASK_UTF_16)
-	  normal_eol = detect_eol (coding.source, src_bytes,
-				   coding_category_raw_text);
+	  {
+	    if (null_byte_found)
+	      normal_eol = EOL_SEEN_LF;
+	    else
+	      normal_eol = detect_eol (coding.source, src_bytes,
+				       coding_category_raw_text);
+	  }
 	if (detect_info.found & (CATEGORY_MASK_UTF_16_BE
 				 | CATEGORY_MASK_UTF_16_BE_NOSIG))
 	  utf_16_be_eol = detect_eol (coding.source, src_bytes,
@@ -7583,7 +7989,7 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep,
       }
   }
 
-  return (highest ? XCAR (val) : val);
+  return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
 }
 
 
@@ -7593,9 +7999,9 @@ DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
 Return a list of possible coding systems ordered by priority.
 
 If only ASCII characters are found (except for such ISO-2022 control
-characters ISO-2022 as ESC), it returns a list of single element
-`undecided' or its subsidiary coding system according to a detected
-end-of-line format.
+characters as ESC), it returns a list of single element `undecided'
+or its subsidiary coding system according to a detected end-of-line
+format.
 
 If optional argument HIGHEST is non-nil, return the coding system of
 highest priority.  */)
@@ -7630,9 +8036,9 @@ DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
 Return a list of possible coding systems ordered by priority.
 
 If only ASCII characters are found (except for such ISO-2022 control
-characters ISO-2022 as ESC), it returns a list of single element
-`undecided' or its subsidiary coding system according to a detected
-end-of-line format.
+characters as ESC), it returns a list of single element `undecided'
+or its subsidiary coding system according to a detected end-of-line
+format.
 
 If optional argument HIGHEST is non-nil, return the coding system of
 highest priority.  */)
@@ -7800,7 +8206,7 @@ DEFUN ("unencodable-char-position", Funencodable_char_position,
        Sunencodable_char_position, 3, 5, 0,
        doc: /*
 Return position of first un-encodable character in a region.
-START and END specfiy the region and CODING-SYSTEM specifies the
+START and END specify the region and CODING-SYSTEM specifies the
 encoding to check.  Return nil if CODING-SYSTEM does encode the region.
 
 If optional 4th argument COUNT is non-nil, it specifies at most how
@@ -7913,7 +8319,7 @@ START and END are buffer positions specifying the region.
 CODING-SYSTEM-LIST is a list of coding systems to check.
 
 The value is an alist ((CODING-SYSTEM POS0 POS1 ...) ...), where
-CODING-SYSTEM is a member of CODING-SYSTEM-LIst and can't encode the
+CODING-SYSTEM is a member of CODING-SYSTEM-LIST and can't encode the
 whole region, POS0, POS1, ... are buffer positions where non-encodable
 characters are found.
 
@@ -8082,13 +8488,14 @@ START and END are buffer positions.
 
 Optional 4th arguments DESTINATION specifies where the decoded text goes.
 If nil, the region between START and END is replaced by the decoded text.
-If buffer, the decoded text is inserted in the buffer.
-If t, the decoded text is returned.
+If buffer, the decoded text is inserted in that buffer after point (point
+does not move).
+In those cases, the length of the decoded text is returned.
+If DESTINATION is t, the decoded text is returned.
 
 This function sets `last-coding-system-used' to the precise coding system
 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
-not fully specified.)
-It returns the length of the decoded text.  */)
+not fully specified.)  */)
      (start, end, coding_system, destination)
      Lisp_Object start, end, coding_system, destination;
 {
@@ -8098,18 +8505,20 @@ It returns the length of the decoded text.  */)
 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
        3, 4, "r\nzCoding system: ",
        doc: /* Encode the current region by specified coding system.
-When called from a program, takes three arguments:
-START, END, and CODING-SYSTEM.  START and END are buffer positions.
+When called from a program, takes four arguments:
+        START, END, CODING-SYSTEM and DESTINATION.
+START and END are buffer positions.
 
 Optional 4th arguments DESTINATION specifies where the encoded text goes.
 If nil, the region between START and END is replace by the encoded text.
-If buffer, the encoded text is inserted in the buffer.
-If t, the encoded text is returned.
+If buffer, the encoded text is inserted in that buffer after point (point
+does not move).
+In those cases, the length of the encoded text is returned.
+If DESTINATION is t, the encoded text is returned.
 
 This function sets `last-coding-system-used' to the precise coding system
 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
-not fully specified.)
-It returns the length of the encoded text.  */)
+not fully specified.)  */)
   (start, end, coding_system, destination)
      Lisp_Object start, end, coding_system, destination;
 {
@@ -8182,13 +8591,13 @@ DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
 Optional third arg NOCOPY non-nil means it is OK to return STRING itself
 if the decoding operation is trivial.
 
-Optional fourth arg BUFFER non-nil meant that the decoded text is
-inserted in BUFFER instead of returned as a string.  In this case,
-the return value is BUFFER.
+Optional fourth arg BUFFER non-nil means that the decoded text is
+inserted in that buffer after point (point does not move).  In this
+case, the return value is the length of the decoded text.
 
 This function sets `last-coding-system-used' to the precise coding system
 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
-not fully specified.  */)
+not fully specified.)  */)
   (string, coding_system, nocopy, buffer)
      Lisp_Object string, coding_system, nocopy, buffer;
 {
@@ -8203,9 +8612,9 @@ DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
 Optional third arg NOCOPY non-nil means it is OK to return STRING
 itself if the encoding operation is trivial.
 
-Optional fourth arg BUFFER non-nil meant that the encoded text is
-inserted in BUFFER instead of returned as a string.  In this case,
-the return value is BUFFER.
+Optional fourth arg BUFFER non-nil means that the encoded text is
+inserted in that buffer after point (point does not move).  In this
+case, the return value is the length of the encoded text.
 
 This function sets `last-coding-system-used' to the precise coding system
 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
@@ -8463,9 +8872,9 @@ whichever argument specifies the file name is TARGET.
 TARGET has a meaning which depends on OPERATION:
   For file I/O, TARGET is a file name (except for the special case below).
   For process I/O, TARGET is a process name.
-  For network I/O, TARGET is a service name or a port number
+  For network I/O, TARGET is a service name or a port number.
 
-This function looks up what specified for TARGET in,
+This function looks up what is specified for TARGET in
 `file-coding-system-alist', `process-coding-system-alist',
 or `network-coding-system-alist' depending on OPERATION.
 They may specify a coding system, a cons of coding systems,
@@ -8495,7 +8904,7 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
   operation = args[0];
   if (!SYMBOLP (operation)
       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
-    error ("Invalid first arguement");
+    error ("Invalid first argument");
   if (nargs < 1 + XINT (target_idx))
     error ("Too few arguments for operation: %s",
 	   SDATA (SYMBOL_NAME (operation)));
@@ -8557,10 +8966,10 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
 DEFUN ("set-coding-system-priority", Fset_coding_system_priority,
        Sset_coding_system_priority, 0, MANY, 0,
        doc: /* Assign higher priority to the coding systems given as arguments.
-If multiple coding systems belongs to the same category,
+If multiple coding systems belong to the same category,
 all but the first one are ignored.
 
-usage: (set-coding-system-priority ...)  */)
+usage: (set-coding-system-priority &rest coding-systems)  */)
      (nargs, args)
      int nargs;
      Lisp_Object *args;
@@ -8921,7 +9330,7 @@ usage: (define-coding-system-internal ...)  */)
 	  val = XCDR (bom);
 	  CHECK_CODING_SYSTEM (val);
 	}
-      ASET (attrs, coding_attr_utf_16_bom, bom);
+      ASET (attrs, coding_attr_utf_bom, bom);
 
       endian = args[coding_arg_utf16_endian];
       CHECK_SYMBOL (endian);
@@ -9100,8 +9509,27 @@ usage: (define-coding-system-internal ...)  */)
     }
   else if (EQ (coding_type, Qutf_8))
     {
-      category = coding_category_utf_8;
+      Lisp_Object bom;
+
       CODING_ATTR_ASCII_COMPAT (attrs) = Qt;
+
+      if (nargs < coding_arg_utf8_max)
+	goto short_args;
+
+      bom = args[coding_arg_utf8_bom];
+      if (! NILP (bom) && ! EQ (bom, Qt))
+	{
+	  CHECK_CONS (bom);
+	  val = XCAR (bom);
+	  CHECK_CODING_SYSTEM (val);
+	  val = XCDR (bom);
+	  CHECK_CODING_SYSTEM (val);
+	}
+      ASET (attrs, coding_attr_utf_bom, bom);
+
+      category = (CONSP (bom) ? coding_category_utf_8_auto
+		  : NILP (bom) ? coding_category_utf_8_nosig
+		  : coding_category_utf_8_sig);
     }
   else if (EQ (coding_type, Qundecided))
     category = coding_category_undecided;
@@ -9114,7 +9542,7 @@ usage: (define-coding-system-internal ...)  */)
     = Fcons (QCcategory, Fcons (AREF (Vcoding_category_table, category),
 				CODING_ATTR_PLIST (attrs)));
   CODING_ATTR_PLIST (attrs)
-    = Fcons (QCascii_compatible_p, 
+    = Fcons (QCascii_compatible_p,
 	     Fcons (CODING_ATTR_ASCII_COMPAT (attrs),
 		    CODING_ATTR_PLIST (attrs)));
 
@@ -9193,7 +9621,7 @@ DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put,
 	CHECK_CHARACTER (val);
       CODING_ATTR_MNEMONIC (attrs) = val;
     }
-  else if (EQ (prop, QCdefalut_char))
+  else if (EQ (prop, QCdefault_char))
     {
       if (NILP (val))
 	val = make_number (' ');
@@ -9245,7 +9673,7 @@ DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
   CHECK_SYMBOL (alias);
   CHECK_CODING_SYSTEM_GET_SPEC (coding_system, spec);
   aliases = AREF (spec, 1);
-  /* ALISES should be a list of length more than zero, and the first
+  /* ALIASES should be a list of length more than zero, and the first
      element is a base coding system.  Append ALIAS at the tail of the
      list.  */
   while (!NILP (XCDR (aliases)))
@@ -9323,7 +9751,7 @@ DEFUN ("coding-system-aliases", Fcoding_system_aliases, Scoding_system_aliases,
 DEFUN ("coding-system-eol-type", Fcoding_system_eol_type,
        Scoding_system_eol_type, 1, 1, 0,
        doc: /* Return eol-type of CODING-SYSTEM.
-An eol-type is integer 0, 1, 2, or a vector of coding systems.
+An eol-type is an integer 0, 1, 2, or a vector of coding systems.
 
 Integer values 0, 1, and 2 indicate a format of end-of-line; LF, CRLF,
 and CR respectively.
@@ -9499,7 +9927,7 @@ syms_of_coding ()
 
   DEFSYM (QCcategory, ":category");
   DEFSYM (QCmnemonic, ":mnemonic");
-  DEFSYM (QCdefalut_char, ":default-char");
+  DEFSYM (QCdefault_char, ":default-char");
   DEFSYM (QCdecode_translation_table, ":decode-translation-table");
   DEFSYM (QCencode_translation_table, ":encode-translation-table");
   DEFSYM (QCpost_read_conversion, ":post-read-conversion");
@@ -9522,8 +9950,12 @@ syms_of_coding ()
 	intern ("coding-category-iso-7-else"));
   ASET (Vcoding_category_table, coding_category_iso_8_else,
 	intern ("coding-category-iso-8-else"));
-  ASET (Vcoding_category_table, coding_category_utf_8,
+  ASET (Vcoding_category_table, coding_category_utf_8_auto,
+	intern ("coding-category-utf-8-auto"));
+  ASET (Vcoding_category_table, coding_category_utf_8_nosig,
 	intern ("coding-category-utf-8"));
+  ASET (Vcoding_category_table, coding_category_utf_8_sig,
+	intern ("coding-category-utf-8-sig"));
   ASET (Vcoding_category_table, coding_category_utf_16_be,
 	intern ("coding-category-utf-16-be"));
   ASET (Vcoding_category_table, coding_category_utf_16_auto,
@@ -9601,7 +10033,7 @@ updated by the functions `define-coding-system' and
   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
 	       doc: /* Alist of coding system names.
 Each element is one element list of coding system name.
-This variable is given to `completing-read' as TABLE argument.
+This variable is given to `completing-read' as COLLECTION argument.
 
 Do not alter the value of this variable manually.  This variable should be
 updated by the functions `make-coding-system' and
@@ -9631,8 +10063,8 @@ Don't modify this variable directly, but use `set-coding-priority'.  */);
 	       doc: /* Specify the coding system for read operations.
 It is useful to bind this variable with `let', but do not set it globally.
 If the value is a coding system, it is used for decoding on read operation.
-If not, an appropriate element is used from one of the coding system alists:
-There are three such tables, `file-coding-system-alist',
+If not, an appropriate element is used from one of the coding system alists.
+There are three such tables: `file-coding-system-alist',
 `process-coding-system-alist', and `network-coding-system-alist'.  */);
   Vcoding_system_for_read = Qnil;
 
@@ -9643,8 +10075,8 @@ If the value is a coding system, it is used for encoding of output,
 when writing it to a file and when sending it to a file or subprocess.
 
 If this does not specify a coding system, an appropriate element
-is used from one of the coding system alists:
-There are three such tables, `file-coding-system-alist',
+is used from one of the coding system alists.
+There are three such tables: `file-coding-system-alist',
 `process-coding-system-alist', and `network-coding-system-alist'.
 For output to files, if the above procedure does not specify a coding system,
 the value of `buffer-file-coding-system' is used.  */);
@@ -9804,7 +10236,7 @@ If Nth element is non-nil, the existence of code N in a file
 a coding system of ISO 2022 variant which has a flag
 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file
 or reading output of a subprocess.
-Only 128th through 159th elements has a meaning.  */);
+Only 128th through 159th elements have a meaning.  */);
   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
 
   DEFVAR_LISP ("select-safe-coding-system-function",
@@ -9860,8 +10292,8 @@ escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argumen
 
   DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input,
 	       doc: /* Char table for translating self-inserting characters.
-This is applied to the result of input methods, not their input.  See also
-`keyboard-translate-table'.  */);
+This is applied to the result of input methods, not their input.
+See also `keyboard-translate-table'.  */);
     Vtranslation_table_for_input = Qnil;
 
   {