Port the font backend from the Mac port.

[gnu-emacs] / src / coding.c
diff --git a/src/coding.c b/src/coding.c

index 5285a90682339341263e02aa6a7418d1b5944fdc..c10fb37567208f8afb3152c62212826c2f49e835 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -286,6 +286,10 @@ encode_coding_XXX (struct coding_system *coding)
  #include <config.h>
  #include <stdio.h>
  
  #include <config.h>
  #include <stdio.h>
  
+#ifdef HAVE_WCHAR_H
+#include <wchar.h>
+#endif /* HAVE_WCHAR_H */
+
  #include "lisp.h"
  #include "character.h"
  #include "buffer.h"
  #include "lisp.h"
  #include "character.h"
  #include "buffer.h"
@@ -322,8 +326,7 @@ Lisp_Object Qcall_process, Qcall_process_region;
  Lisp_Object Qstart_process, Qopen_network_stream;
  static Lisp_Object Qtarget_idx;
  
  Lisp_Object Qstart_process, Qopen_network_stream;
  static Lisp_Object Qtarget_idx;
  
-static Lisp_Object Qinsufficient_source, Qinconsistent_eol, Qinvalid_source;
-static Lisp_Object Qinterrupted, Qinsufficient_memory;
+static Lisp_Object Qinsufficient_source, Qinvalid_source, Qinterrupted;
  
  /* If a symbol has this property, evaluate the value to define the
     symbol as a coding system.  */
  
  /* If a symbol has this property, evaluate the value to define the
     symbol as a coding system.  */
@@ -490,6 +493,8 @@ enum iso_code_class_type
  
  #define CODING_ISO_FLAG_USE_OLDJIS     0x10000
  
  
  #define CODING_ISO_FLAG_USE_OLDJIS     0x10000
  
+#define CODING_ISO_FLAG_LEVEL_4                0x20000
+
  #define CODING_ISO_FLAG_FULL_SUPPORT   0x100000
  
  /* A character to be produced on output if encoding of the original
  #define CODING_ISO_FLAG_FULL_SUPPORT   0x100000
  
  /* A character to be produced on output if encoding of the original
@@ -646,12 +651,45 @@ static struct coding_system coding_categories[coding_category_max];
  #define max(a, b) ((a) > (b) ? (a) : (b))
  #endif
  
  #define max(a, b) ((a) > (b) ? (a) : (b))
  #endif
  
+/* Encode a flag that can be nil, something else, or t as -1, 0, 1.  */
+
+static int
+encode_inhibit_flag (Lisp_Object flag)
+{
+  return NILP (flag) ? -1 : EQ (flag, Qt);
+}
+
+/* True if the value of ENCODED_FLAG says a flag should be treated as set.
+   1 means yes, -1 means no, 0 means ask the user variable VAR.  */
+
+static bool
+inhibit_flag (int encoded_flag, bool var)
+{
+  return 0 < encoded_flag + var;
+}
+
  #define CODING_GET_INFO(coding, attrs, charset_list)   \
    do {                                                 \
      (attrs) = CODING_ID_ATTRS ((coding)->id);          \
      (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
    } while (0)
  
  #define CODING_GET_INFO(coding, attrs, charset_list)   \
    do {                                                 \
      (attrs) = CODING_ID_ATTRS ((coding)->id);          \
      (charset_list) = CODING_ATTR_CHARSET_LIST (attrs); \
    } while (0)
  
+static void
+CHECK_NATNUM_CAR (Lisp_Object x)
+{
+  Lisp_Object tmp = XCAR (x);
+  CHECK_NATNUM (tmp);
+  XSETCAR (x, tmp);
+}
+
+static void
+CHECK_NATNUM_CDR (Lisp_Object x)
+{
+  Lisp_Object tmp = XCDR (x);
+  CHECK_NATNUM (tmp);
+  XSETCDR (x, tmp);
+}
+
  
  /* Safely get one byte from the source text pointed by SRC which ends
     at SRC_END, and set C to that byte.  If there are not enough bytes
  
  /* Safely get one byte from the source text pointed by SRC which ends
     at SRC_END, and set C to that byte.  If there are not enough bytes
@@ -820,18 +858,12 @@ record_conversion_result (struct coding_system *coding,
      case CODING_RESULT_INSUFFICIENT_SRC:
        Vlast_code_conversion_error = Qinsufficient_source;
        break;
      case CODING_RESULT_INSUFFICIENT_SRC:
        Vlast_code_conversion_error = Qinsufficient_source;
        break;
-    case CODING_RESULT_INCONSISTENT_EOL:
-      Vlast_code_conversion_error = Qinconsistent_eol;
-      break;
      case CODING_RESULT_INVALID_SRC:
        Vlast_code_conversion_error = Qinvalid_source;
        break;
      case CODING_RESULT_INTERRUPT:
        Vlast_code_conversion_error = Qinterrupted;
        break;
      case CODING_RESULT_INVALID_SRC:
        Vlast_code_conversion_error = Qinvalid_source;
        break;
      case CODING_RESULT_INTERRUPT:
        Vlast_code_conversion_error = Qinterrupted;
        break;
-    case CODING_RESULT_INSUFFICIENT_MEM:
-      Vlast_code_conversion_error = Qinsufficient_memory;
-      break;
      case CODING_RESULT_INSUFFICIENT_DST:
        /* Don't record this error in Vlast_code_conversion_error
          because it happens just temporarily and is resolved when the
      case CODING_RESULT_INSUFFICIENT_DST:
        /* Don't record this error in Vlast_code_conversion_error
          because it happens just temporarily and is resolved when the
@@ -1049,14 +1081,7 @@ coding_alloc_by_making_gap (struct coding_system *coding,
        GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
      }
    else
        GPT -= gap_head_used, GPT_BYTE -= gap_head_used;
      }
    else
-    {
-      Lisp_Object this_buffer;
-
-      this_buffer = Fcurrent_buffer ();
-      set_buffer_internal (XBUFFER (coding->dst_object));
-      make_gap (bytes);
-      set_buffer_internal (XBUFFER (this_buffer));
-    }
+    make_gap_1 (XBUFFER (coding->dst_object), bytes);
  }
  
  
  }
  
  
@@ -1135,6 +1160,14 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes,
      *buf++ = id;                                                       \
    } while (0)
  
      *buf++ = id;                                                       \
    } while (0)
  
+
+/* Bitmasks for coding->eol_seen.  */
+
+#define EOL_SEEN_NONE  0
+#define EOL_SEEN_LF    1
+#define EOL_SEEN_CR    2
+#define EOL_SEEN_CRLF  4
+
  \f
  /*** 2. Emacs' internal format (emacs-utf-8) ***/
  
  \f
  /*** 2. Emacs' internal format (emacs-utf-8) ***/
  
@@ -1157,6 +1190,9 @@ alloc_destination (struct coding_system *coding, ptrdiff_t nbytes,
  #define UTF_8_BOM_2 0xBB
  #define UTF_8_BOM_3 0xBF
  
  #define UTF_8_BOM_2 0xBB
  #define UTF_8_BOM_3 0xBF
  
+/* Unlike the other detect_coding_XXX, this function counts number of
+   characters and check EOL format.  */
+
  static bool
  detect_coding_utf_8 (struct coding_system *coding,
                      struct coding_detection_info *detect_info)
  static bool
  detect_coding_utf_8 (struct coding_system *coding,
                      struct coding_detection_info *detect_info)
@@ -1166,11 +1202,23 @@ detect_coding_utf_8 (struct coding_system *coding,
    bool multibytep = coding->src_multibyte;
    ptrdiff_t consumed_chars = 0;
    bool bom_found = 0;
    bool multibytep = coding->src_multibyte;
    ptrdiff_t consumed_chars = 0;
    bool bom_found = 0;
-  bool found = 0;
+  int nchars = coding->head_ascii;
+  int eol_seen = coding->eol_seen;
  
    detect_info->checked |= CATEGORY_MASK_UTF_8;
    /* A coding system of this category is always ASCII compatible.  */
  
    detect_info->checked |= CATEGORY_MASK_UTF_8;
    /* A coding system of this category is always ASCII compatible.  */
-  src += coding->head_ascii;
+  src += nchars;
+
+  if (src == coding->source    /* BOM should be at the head.  */
+      && src + 3 < src_end     /* BOM is 3-byte long.  */
+      && src[0] == UTF_8_BOM_1
+      && src[1] == UTF_8_BOM_2
+      && src[2] == UTF_8_BOM_3)
+    {
+      bom_found = 1;
+      src += 3;
+      nchars++;
+    }
  
    while (1)
      {
  
    while (1)
      {
@@ -1179,13 +1227,29 @@ detect_coding_utf_8 (struct coding_system *coding,
        src_base = src;
        ONE_MORE_BYTE (c);
        if (c < 0 || UTF_8_1_OCTET_P (c))
        src_base = src;
        ONE_MORE_BYTE (c);
        if (c < 0 || UTF_8_1_OCTET_P (c))
-       continue;
+       {
+         nchars++;
+         if (c == '\r')
+           {
+             if (src < src_end && *src == '\n')
+               {
+                 eol_seen |= EOL_SEEN_CRLF;
+                 src++;
+                 nchars++;
+               }
+             else
+               eol_seen |= EOL_SEEN_CR;
+           }
+         else if (c == '\n')
+           eol_seen |= EOL_SEEN_LF;
+         continue;
+       }
        ONE_MORE_BYTE (c1);
        if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
         break;
        if (UTF_8_2_OCTET_LEADING_P (c))
         {
        ONE_MORE_BYTE (c1);
        if (c1 < 0 || ! UTF_8_EXTRA_OCTET_P (c1))
         break;
        if (UTF_8_2_OCTET_LEADING_P (c))
         {
-         found = 1;
+         nchars++;
           continue;
         }
        ONE_MORE_BYTE (c2);
           continue;
         }
        ONE_MORE_BYTE (c2);
@@ -1193,10 +1257,7 @@ detect_coding_utf_8 (struct coding_system *coding,
         break;
        if (UTF_8_3_OCTET_LEADING_P (c))
         {
         break;
        if (UTF_8_3_OCTET_LEADING_P (c))
         {
-         found = 1;
-         if (src_base == coding->source
-             && c == UTF_8_BOM_1 && c1 == UTF_8_BOM_2 && c2 == UTF_8_BOM_3)
-           bom_found = 1;
+         nchars++;
           continue;
         }
        ONE_MORE_BYTE (c3);
           continue;
         }
        ONE_MORE_BYTE (c3);
@@ -1204,7 +1265,7 @@ detect_coding_utf_8 (struct coding_system *coding,
         break;
        if (UTF_8_4_OCTET_LEADING_P (c))
         {
         break;
        if (UTF_8_4_OCTET_LEADING_P (c))
         {
-         found = 1;
+         nchars++;
           continue;
         }
        ONE_MORE_BYTE (c4);
           continue;
         }
        ONE_MORE_BYTE (c4);
@@ -1212,7 +1273,7 @@ detect_coding_utf_8 (struct coding_system *coding,
         break;
        if (UTF_8_5_OCTET_LEADING_P (c))
         {
         break;
        if (UTF_8_5_OCTET_LEADING_P (c))
         {
-         found = 1;
+         nchars++;
           continue;
         }
        break;
           continue;
         }
        break;
@@ -1229,14 +1290,17 @@ detect_coding_utf_8 (struct coding_system *coding,
    if (bom_found)
      {
        /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
    if (bom_found)
      {
        /* The first character 0xFFFE doesn't necessarily mean a BOM.  */
-      detect_info->found |= CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
+      detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_SIG | CATEGORY_MASK_UTF_8_NOSIG;
      }
    else
      {
        detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
      }
    else
      {
        detect_info->rejected |= CATEGORY_MASK_UTF_8_SIG;
-      if (found)
-       detect_info->found |= CATEGORY_MASK_UTF_8_NOSIG;
+      if (nchars < src_end - coding->source)
+       /* The found characters are less than source bytes, which
+          means that we found a valid non-ASCII characters.  */
+       detect_info->found |= CATEGORY_MASK_UTF_8_AUTO | CATEGORY_MASK_UTF_8_NOSIG;
      }
      }
+  coding->detected_utf8_chars = nchars;
    return 1;
  }
  
    return 1;
  }
  
@@ -1301,6 +1365,45 @@ decode_coding_utf_8 (struct coding_system *coding)
           break;
         }
  
           break;
         }
  
+      /* In the simple case, rapidly handle ordinary characters */
+      if (multibytep && ! eol_dos
+         && charbuf < charbuf_end - 6 && src < src_end - 6)
+       {
+         while (charbuf < charbuf_end - 6 && src < src_end - 6)
+           {
+             c1 = *src;
+             if (c1 & 0x80)
+               break;
+             src++;
+             consumed_chars++;
+             *charbuf++ = c1;
+
+             c1 = *src;
+             if (c1 & 0x80)
+               break;
+             src++;
+             consumed_chars++;
+             *charbuf++ = c1;
+
+             c1 = *src;
+             if (c1 & 0x80)
+               break;
+             src++;
+             consumed_chars++;
+             *charbuf++ = c1;
+
+             c1 = *src;
+             if (c1 & 0x80)
+               break;
+             src++;
+             consumed_chars++;
+             *charbuf++ = c1;
+           }
+         /* If we handled at least one character, restart the main loop.  */
+         if (src != src_base)
+           continue;
+       }
+
        if (byte_after_cr >= 0)
         c1 = byte_after_cr, byte_after_cr = -1;
        else
        if (byte_after_cr >= 0)
         c1 = byte_after_cr, byte_after_cr = -1;
        else
@@ -3064,20 +3167,7 @@ detect_coding_iso_2022 (struct coding_system *coding,
             }
           if (single_shifting)
             break;
             }
           if (single_shifting)
             break;
-       check_extra_latin:
-         if (! VECTORP (Vlatin_extra_code_table)
-             || NILP (AREF (Vlatin_extra_code_table, c)))
-           {
-             rejected = CATEGORY_MASK_ISO;
-             break;
-           }
-         if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
-             & CODING_ISO_FLAG_LATIN_EXTRA)
-           found |= CATEGORY_MASK_ISO_8_1;
-         else
-           rejected |= CATEGORY_MASK_ISO_8_1;
-         rejected |= CATEGORY_MASK_ISO_8_2;
-         break;
+         goto check_extra_latin;
  
         default:
           if (c < 0)
  
         default:
           if (c < 0)
@@ -3128,6 +3218,20 @@ detect_coding_iso_2022 (struct coding_system *coding,
                 }
               break;
             }
                 }
               break;
             }
+       check_extra_latin:
+         if (! VECTORP (Vlatin_extra_code_table)
+             || NILP (AREF (Vlatin_extra_code_table, c)))
+           {
+             rejected = CATEGORY_MASK_ISO;
+             break;
+           }
+         if (CODING_ISO_FLAGS (&coding_categories[coding_category_iso_8_1])
+             & CODING_ISO_FLAG_LATIN_EXTRA)
+           found |= CATEGORY_MASK_ISO_8_1;
+         else
+           rejected |= CATEGORY_MASK_ISO_8_1;
+         rejected |= CATEGORY_MASK_ISO_8_2;
+         break;
         }
      }
    detect_info->rejected |= CATEGORY_MASK_ISO;
         }
      }
    detect_info->rejected |= CATEGORY_MASK_ISO;
@@ -3670,7 +3774,10 @@ decode_coding_iso_2022 (struct coding_system *coding)
               else
                 charset = CHARSET_FROM_ID (charset_id_2);
               ONE_MORE_BYTE (c1);
               else
                 charset = CHARSET_FROM_ID (charset_id_2);
               ONE_MORE_BYTE (c1);
-             if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+             if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
+                 || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
+                     && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
+                         ? c1 >= 0x80 : c1 < 0x80)))
                 goto invalid_code;
               break;
  
                 goto invalid_code;
               break;
  
@@ -3684,7 +3791,10 @@ decode_coding_iso_2022 (struct coding_system *coding)
               else
                 charset = CHARSET_FROM_ID (charset_id_3);
               ONE_MORE_BYTE (c1);
               else
                 charset = CHARSET_FROM_ID (charset_id_3);
               ONE_MORE_BYTE (c1);
-             if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+             if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)
+                 || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS)
+                     && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4)
+                         ? c1 >= 0x80 : c1 < 0x80)))
                 goto invalid_code;
               break;
  
                 goto invalid_code;
               break;
  
@@ -3896,6 +4006,14 @@ decode_coding_iso_2022 (struct coding_system *coding)
        *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
        char_offset++;
        coding->errors++;
        *charbuf++ = c < 0 ? -c : ASCII_BYTE_P (c) ? c : BYTE8_TO_CHAR (c);
        char_offset++;
        coding->errors++;
+      /* Reset the invocation and designation status to the safest
+        one; i.e. designate ASCII to the graphic register 0, and
+        invoke that register to the graphic plane 0.  This typically
+        helps the case that an designation sequence for ASCII "ESC (
+        B" is somehow broken (e.g. broken by a newline).  */
+      CODING_ISO_INVOCATION (coding, 0) = 0;
+      CODING_ISO_DESIGNATION (coding, 0) = charset_ascii;
+      charset_id_0 = charset_ascii;
        continue;
  
      break_loop:
        continue;
  
      break_loop:
@@ -5623,7 +5741,6 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
    eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
  
    coding->mode = 0;
    eol_type = inhibit_eol_conversion ? Qunix : CODING_ID_EOL_TYPE (coding->id);
  
    coding->mode = 0;
-  coding->head_ascii = -1;
    if (VECTORP (eol_type))
      coding->common_flags = (CODING_REQUIRE_DECODING_MASK
                             | CODING_REQUIRE_DETECTION_MASK);
    if (VECTORP (eol_type))
      coding->common_flags = (CODING_REQUIRE_DECODING_MASK
                             | CODING_REQUIRE_DETECTION_MASK);
@@ -5652,6 +5769,14 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding)
        coding->decoder = decode_coding_raw_text;
        coding->encoder = encode_coding_raw_text;
        coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
        coding->decoder = decode_coding_raw_text;
        coding->encoder = encode_coding_raw_text;
        coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
+      coding->spec.undecided.inhibit_nbd
+       = (encode_inhibit_flag
+          (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection)));
+      coding->spec.undecided.inhibit_ied
+       = (encode_inhibit_flag
+          (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection)));
+      coding->spec.undecided.prefer_utf_8
+       = ! NILP (AREF (attrs, coding_attr_undecided_prefer_utf_8));
      }
    else if (EQ (coding_type, Qiso_2022))
      {
      }
    else if (EQ (coding_type, Qiso_2022))
      {
@@ -6075,10 +6200,181 @@ complement_process_encoding_system (Lisp_Object coding_system)
  
  */
  
  
  */
  
-#define EOL_SEEN_NONE  0
-#define EOL_SEEN_LF    1
-#define EOL_SEEN_CR    2
-#define EOL_SEEN_CRLF  4
+static Lisp_Object adjust_coding_eol_type (struct coding_system *coding,
+                                          int eol_seen);
+
+
+/* Return the number of ASCII characters at the head of the source.
+   By side effects, set coding->head_ascii and update
+   coding->eol_seen.  The value of coding->eol_seen is "logical or" of
+   EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but the value is
+   reliable only when all the source bytes are ASCII.  */
+
+static int
+check_ascii (struct coding_system *coding)
+{
+  const unsigned char *src, *end;
+  Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
+  int eol_seen = coding->eol_seen;
+
+  coding_set_source (coding);
+  src = coding->source;
+  end = src + coding->src_bytes;
+
+  if (inhibit_eol_conversion
+      || SYMBOLP (eol_type))
+    {
+      /* We don't have to check EOL format.  */
+      while (src < end && !( *src & 0x80))
+       {
+         if (*src++ == '\n')
+           eol_seen |= EOL_SEEN_LF;
+       }
+    }
+  else
+    {
+      end--;               /* We look ahead one byte for "CR LF".  */
+      while (src < end)
+       {
+         int c = *src;
+
+         if (c & 0x80)
+           break;
+         src++;
+         if (c == '\r')
+           {
+             if (*src == '\n')
+               {
+                 eol_seen |= EOL_SEEN_CRLF;
+                 src++;
+               }
+             else
+               eol_seen |= EOL_SEEN_CR;
+           }
+         else if (c == '\n')
+           eol_seen |= EOL_SEEN_LF;
+       }
+      if (src == end)
+       {
+         int c = *src;
+
+         /* All bytes but the last one C are ASCII.  */
+         if (! (c & 0x80))
+           {
+             if (c == '\r')
+               eol_seen |= EOL_SEEN_CR;
+             else if (c  == '\n')
+               eol_seen |= EOL_SEEN_LF;
+             src++;
+           }
+       }
+    }
+  coding->head_ascii = src - coding->source;
+  coding->eol_seen = eol_seen;
+  return (coding->head_ascii);
+}
+
+
+/* Return the number of characters at the source if all the bytes are
+   valid UTF-8 (of Unicode range).  Otherwise, return -1.  By side
+   effects, update coding->eol_seen.  The value of coding->eol_seen is
+   "logical or" of EOL_SEEN_LF, EOL_SEEN_CR, and EOL_SEEN_CRLF, but
+   the value is reliable only when all the source bytes are valid
+   UTF-8.  */
+
+static int
+check_utf_8 (struct coding_system *coding)
+{
+  const unsigned char *src, *end;
+  int eol_seen;
+  int nchars = coding->head_ascii;
+
+  if (coding->head_ascii < 0)
+    check_ascii (coding);
+  else
+    coding_set_source (coding);
+  src = coding->source + coding->head_ascii;
+  /* We look ahead one byte for CR LF.  */
+  end = coding->source + coding->src_bytes - 1;
+  eol_seen = coding->eol_seen;
+  while (src < end)
+    {
+      int c = *src;
+
+      if (UTF_8_1_OCTET_P (*src))
+       {
+         src++;
+         if (c < 0x20)
+           {
+             if (c == '\r')
+               {
+                 if (*src == '\n')
+                   {
+                     eol_seen |= EOL_SEEN_CRLF;
+                     src++;
+                     nchars++;
+                   }
+                 else
+                   eol_seen |= EOL_SEEN_CR;
+               }
+             else if (c == '\n')
+               eol_seen |= EOL_SEEN_LF;
+           }
+       }
+      else if (UTF_8_2_OCTET_LEADING_P (c))
+       {
+         if (c < 0xC2          /* overlong sequence */
+             || src + 1 >= end
+             || ! UTF_8_EXTRA_OCTET_P (src[1]))
+           return -1;
+         src += 2;
+       }
+      else if (UTF_8_3_OCTET_LEADING_P (c))
+       {
+         if (src + 2 >= end
+             || ! (UTF_8_EXTRA_OCTET_P (src[1])
+                   && UTF_8_EXTRA_OCTET_P (src[2])))
+           return -1;
+         c = (((c & 0xF) << 12)
+              | ((src[1] & 0x3F) << 6) | (src[2] & 0x3F));
+         if (c < 0x800                       /* overlong sequence */
+             || (c >= 0xd800 && c < 0xe000)) /* surrogates (invalid) */
+           return -1;
+         src += 3;
+       }
+      else if (UTF_8_4_OCTET_LEADING_P (c))
+       {
+         if (src + 3 >= end
+             || ! (UTF_8_EXTRA_OCTET_P (src[1])
+                   && UTF_8_EXTRA_OCTET_P (src[2])
+                   && UTF_8_EXTRA_OCTET_P (src[3])))
+           return -1;
+         c = (((c & 0x7) << 18) | ((src[1] & 0x3F) << 12)
+              | ((src[2] & 0x3F) << 6) | (src[3] & 0x3F));
+         if (c < 0x10000       /* overlong sequence */
+             || c >= 0x110000) /* non-Unicode character  */
+           return -1;
+         src += 4;
+       }
+      else
+       return -1;
+      nchars++;
+    }
+
+  if (src == end)
+    {
+      if (! UTF_8_1_OCTET_P (*src))
+       return -1;
+      nchars++;
+      if (*src == '\r')
+       eol_seen |= EOL_SEEN_CR;
+      else if (*src  == '\n')
+       eol_seen |= EOL_SEEN_LF;
+    }
+  coding->eol_seen = eol_seen;
+  return nchars;
+}
+
  
  /* Detect how end-of-line of a text of length SRC_BYTES pointed by
     SOURCE is encoded.  If CATEGORY is one of
  
  /* Detect how end-of-line of a text of length SRC_BYTES pointed by
     SOURCE is encoded.  If CATEGORY is one of
@@ -6191,6 +6487,9 @@ adjust_coding_eol_type (struct coding_system *coding, int eol_seen)
    Lisp_Object eol_type;
  
    eol_type = CODING_ID_EOL_TYPE (coding->id);
    Lisp_Object eol_type;
  
    eol_type = CODING_ID_EOL_TYPE (coding->id);
+  if (! VECTORP (eol_type))
+    /* Already adjusted.  */
+    return eol_type;
    if (eol_seen & EOL_SEEN_LF)
      {
        coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
    if (eol_seen & EOL_SEEN_LF)
      {
        coding->id = CODING_SYSTEM_ID (AREF (eol_type, 0));
@@ -6218,14 +6517,16 @@ detect_coding (struct coding_system *coding)
  {
    const unsigned char *src, *src_end;
    unsigned int saved_mode = coding->mode;
  {
    const unsigned char *src, *src_end;
    unsigned int saved_mode = coding->mode;
+  Lisp_Object found = Qnil;
+  Lisp_Object eol_type = CODING_ID_EOL_TYPE (coding->id);
  
    coding->consumed = coding->consumed_char = 0;
    coding->produced = coding->produced_char = 0;
    coding_set_source (coding);
  
    src_end = coding->source + coding->src_bytes;
  
    coding->consumed = coding->consumed_char = 0;
    coding->produced = coding->produced_char = 0;
    coding_set_source (coding);
  
    src_end = coding->source + coding->src_bytes;
-  coding->head_ascii = 0;
  
  
+  coding->eol_seen = EOL_SEEN_NONE;
    /* If we have not yet decided the text encoding type, detect it
       now.  */
    if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
    /* If we have not yet decided the text encoding type, detect it
       now.  */
    if (EQ (CODING_ATTR_TYPE (CODING_ID_ATTRS (coding->id)), Qundecided))
@@ -6233,7 +6534,13 @@ detect_coding (struct coding_system *coding)
        int c, i;
        struct coding_detection_info detect_info;
        bool null_byte_found = 0, eight_bit_found = 0;
        int c, i;
        struct coding_detection_info detect_info;
        bool null_byte_found = 0, eight_bit_found = 0;
+      bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd,
+                                      inhibit_null_byte_detection);
+      bool inhibit_ied = inhibit_flag (coding->spec.undecided.inhibit_ied,
+                                      inhibit_iso_escape_detection);
+      bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8;
  
  
+      coding->head_ascii = 0;
        detect_info.checked = detect_info.found = detect_info.rejected = 0;
        for (src = coding->source; src < src_end; src++)
         {
        detect_info.checked = detect_info.found = detect_info.rejected = 0;
        for (src = coding->source; src < src_end; src++)
         {
@@ -6247,7 +6554,7 @@ detect_coding (struct coding_system *coding)
           else if (c < 0x20)
             {
               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
           else if (c < 0x20)
             {
               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
-                 && ! inhibit_iso_escape_detection
+                 && ! inhibit_ied
                   && ! detect_info.checked)
                 {
                   if (detect_coding_iso_2022 (coding, &detect_info))
                   && ! detect_info.checked)
                 {
                   if (detect_coding_iso_2022 (coding, &detect_info))
@@ -6266,12 +6573,33 @@ detect_coding (struct coding_system *coding)
                       break;
                     }
                 }
                       break;
                     }
                 }
-             else if (! c && !inhibit_null_byte_detection)
+             else if (! c && !inhibit_nbd)
                 {
                   null_byte_found = 1;
                   if (eight_bit_found)
                     break;
                 }
                 {
                   null_byte_found = 1;
                   if (eight_bit_found)
                     break;
                 }
+             else if (! disable_ascii_optimization
+                      && ! inhibit_eol_conversion)
+               {
+                 if (c == '\r')
+                   {
+                     if (src < src_end && src[1] == '\n')
+                       {
+                         coding->eol_seen |= EOL_SEEN_CRLF;
+                         src++;
+                         if (! eight_bit_found)
+                           coding->head_ascii++;
+                       }
+                     else
+                       coding->eol_seen |= EOL_SEEN_CR;
+                   }
+                 else if (c == '\n')
+                   {
+                     coding->eol_seen |= EOL_SEEN_LF;
+                   }
+               }
+
               if (! eight_bit_found)
                 coding->head_ascii++;
             }
               if (! eight_bit_found)
                 coding->head_ascii++;
             }
@@ -6302,6 +6630,12 @@ detect_coding (struct coding_system *coding)
                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
                 }
                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
                 }
+             else if (prefer_utf_8
+                      && detect_coding_utf_8 (coding, &detect_info))
+               {
+                 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
+                 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
+               }
               for (i = 0; i < coding_category_raw_text; i++)
                 {
                   category = coding_priorities[i];
               for (i = 0; i < coding_category_raw_text; i++)
                 {
                   category = coding_priorities[i];
@@ -6323,32 +6657,58 @@ detect_coding (struct coding_system *coding)
                     }
                   else if ((*(this->detector)) (coding, &detect_info)
                            && detect_info.found & (1 << category))
                     }
                   else if ((*(this->detector)) (coding, &detect_info)
                            && detect_info.found & (1 << category))
-                   {
-                     if (category == coding_category_utf_16_auto)
-                       {
-                         if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
-                           category = coding_category_utf_16_le;
-                         else
-                           category = coding_category_utf_16_be;
-                       }
-                     break;
-                   }
+                   break;
                 }
             }
  
           if (i < coding_category_raw_text)
                 }
             }
  
           if (i < coding_category_raw_text)
-           setup_coding_system (CODING_ID_NAME (this->id), coding);
+           {
+             if (category == coding_category_utf_8_auto)
+               {
+                 Lisp_Object coding_systems;
+
+                 coding_systems = AREF (CODING_ID_ATTRS (this->id),
+                                        coding_attr_utf_bom);
+                 if (CONSP (coding_systems))
+                   {
+                     if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
+                       found = XCAR (coding_systems);
+                     else
+                       found = XCDR (coding_systems);
+                   }
+                 else
+                   found = CODING_ID_NAME (this->id);
+               }
+             else if (category == coding_category_utf_16_auto)
+               {
+                 Lisp_Object coding_systems;
+
+                 coding_systems = AREF (CODING_ID_ATTRS (this->id),
+                                        coding_attr_utf_bom);
+                 if (CONSP (coding_systems))
+                   {
+                     if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
+                       found = XCAR (coding_systems);
+                     else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
+                       found = XCDR (coding_systems);
+                   }
+                 else
+                   found = CODING_ID_NAME (this->id);
+               }
+             else
+               found = CODING_ID_NAME (this->id);
+           }
           else if (null_byte_found)
           else if (null_byte_found)
-           setup_coding_system (Qno_conversion, coding);
+           found = Qno_conversion;
           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
                    == CATEGORY_MASK_ANY)
           else if ((detect_info.rejected & CATEGORY_MASK_ANY)
                    == CATEGORY_MASK_ANY)
-           setup_coding_system (Qraw_text, coding);
+           found = Qraw_text;
           else if (detect_info.rejected)
             for (i = 0; i < coding_category_raw_text; i++)
               if (! (detect_info.rejected & (1 << coding_priorities[i])))
                 {
                   this = coding_categories + coding_priorities[i];
           else if (detect_info.rejected)
             for (i = 0; i < coding_category_raw_text; i++)
               if (! (detect_info.rejected & (1 << coding_priorities[i])))
                 {
                   this = coding_categories + coding_priorities[i];
-                 setup_coding_system (CODING_ID_NAME (this->id), coding);
+                 found = CODING_ID_NAME (this->id);
                   break;
                 }
         }
                   break;
                 }
         }
@@ -6362,14 +6722,21 @@ detect_coding (struct coding_system *coding)
        coding_systems
         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
        detect_info.found = detect_info.rejected = 0;
        coding_systems
         = AREF (CODING_ID_ATTRS (coding->id), coding_attr_utf_bom);
        detect_info.found = detect_info.rejected = 0;
-      coding->head_ascii = 0;
-      if (CONSP (coding_systems)
-         && detect_coding_utf_8 (coding, &detect_info))
+      if (check_ascii (coding) == coding->src_bytes)
         {
         {
-         if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
-           setup_coding_system (XCAR (coding_systems), coding);
-         else
-           setup_coding_system (XCDR (coding_systems), coding);
+         if (CONSP (coding_systems))
+           found = XCDR (coding_systems);
+       }
+      else
+       {
+         if (CONSP (coding_systems)
+             && detect_coding_utf_8 (coding, &detect_info))
+           {
+             if (detect_info.found & CATEGORY_MASK_UTF_8_SIG)
+               found = XCAR (coding_systems);
+             else
+               found = XCDR (coding_systems);
+           }
         }
      }
    else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
         }
      }
    else if (XINT (CODING_ATTR_CATEGORY (CODING_ID_ATTRS (coding->id)))
@@ -6386,11 +6753,24 @@ detect_coding (struct coding_system *coding)
           && detect_coding_utf_16 (coding, &detect_info))
         {
           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
           && detect_coding_utf_16 (coding, &detect_info))
         {
           if (detect_info.found & CATEGORY_MASK_UTF_16_LE)
-           setup_coding_system (XCAR (coding_systems), coding);
+           found = XCAR (coding_systems);
           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
           else if (detect_info.found & CATEGORY_MASK_UTF_16_BE)
-           setup_coding_system (XCDR (coding_systems), coding);
+           found = XCDR (coding_systems);
         }
      }
         }
      }
+
+  if (! NILP (found))
+    {
+      int specified_eol = (VECTORP (eol_type) ? EOL_SEEN_NONE
+                          : EQ (eol_type, Qdos) ? EOL_SEEN_CRLF
+                          : EQ (eol_type, Qmac) ? EOL_SEEN_CR
+                          : EOL_SEEN_LF);
+
+      setup_coding_system (found, coding);
+      if (specified_eol != EOL_SEEN_NONE)
+       adjust_coding_eol_type (coding, specified_eol);
+    }
+
    coding->mode = saved_mode;
  }
  
    coding->mode = saved_mode;
  }
  
@@ -6531,11 +6911,9 @@ get_translation_table (Lisp_Object attrs, bool encodep, int *max_lookup)
        if (CHAR_TABLE_P (standard))
         {
           if (CONSP (translation_table))
        if (CHAR_TABLE_P (standard))
         {
           if (CONSP (translation_table))
-           translation_table = nconc2 (translation_table,
-                                       Fcons (standard, Qnil));
+           translation_table = nconc2 (translation_table, list1 (standard));
           else
           else
-           translation_table = Fcons (translation_table,
-                                      Fcons (standard, Qnil));
+           translation_table = list2 (translation_table, standard);
         }
      }
  
         }
      }
  
@@ -6819,7 +7197,7 @@ produce_chars (struct coding_system *coding, Lisp_Object translation_table,
  
    produced = dst - (coding->destination + coding->produced);
    if (BUFFERP (coding->dst_object) && produced_chars > 0)
  
    produced = dst - (coding->destination + coding->produced);
    if (BUFFERP (coding->dst_object) && produced_chars > 0)
-    insert_from_gap (produced_chars, produced);
+    insert_from_gap (produced_chars, produced, 0);
    coding->produced += produced;
    coding->produced_char += produced_chars;
    return carryover;
    coding->produced += produced;
    coding->produced_char += produced_chars;
    return carryover;
@@ -6890,22 +7268,8 @@ produce_charset (struct coding_system *coding, int *charbuf, ptrdiff_t pos)
  
  #define ALLOC_CONVERSION_WORK_AREA(coding)                             \
    do {                                                                 \
  
  #define ALLOC_CONVERSION_WORK_AREA(coding)                             \
    do {                                                                 \
-    int size = CHARBUF_SIZE;                                           \
-                                                                       \
-    coding->charbuf = NULL;                                            \
-    while (size > 1024)                                                        \
-      {                                                                        \
-       coding->charbuf = alloca (sizeof (int) * size);                 \
-       if (coding->charbuf)                                            \
-         break;                                                        \
-       size >>= 1;                                                     \
-      }                                                                        \
-    if (! coding->charbuf)                                             \
-      {                                                                        \
-       record_conversion_result (coding, CODING_RESULT_INSUFFICIENT_MEM); \
-       return;                                                         \
-      }                                                                        \
-    coding->charbuf_size = size;                                       \
+    coding->charbuf = SAFE_ALLOCA (CHARBUF_SIZE * sizeof (int));       \
+    coding->charbuf_size = CHARBUF_SIZE;                               \
    } while (0)
  
  
    } while (0)
  
  
@@ -6974,6 +7338,8 @@ decode_coding (struct coding_system *coding)
    int carryover;
    int i;
  
    int carryover;
    int i;
  
+  USE_SAFE_ALLOCA;
+
    if (BUFFERP (coding->src_object)
        && coding->src_pos > 0
        && coding->src_pos < GPT
    if (BUFFERP (coding->src_object)
        && coding->src_pos > 0
        && coding->src_pos < GPT
@@ -7096,6 +7462,8 @@ decode_coding (struct coding_system *coding)
        bset_undo_list (current_buffer, undo_list);
        record_insert (coding->dst_pos, coding->produced_char);
      }
        bset_undo_list (current_buffer, undo_list);
        record_insert (coding->dst_pos, coding->produced_char);
      }
+
+  SAFE_FREE ();
  }
  
  
  }
  
  
@@ -7129,7 +7497,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit,
           /* We found a composition.  Store the corresponding
              annotation data in BUF.  */
           int *head = buf;
           /* We found a composition.  Store the corresponding
              annotation data in BUF.  */
           int *head = buf;
-         enum composition_method method = COMPOSITION_METHOD (prop);
+         enum composition_method method = composition_method (prop);
           int nchars = COMPOSITION_LENGTH (prop);
  
           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
           int nchars = COMPOSITION_LENGTH (prop);
  
           ADD_COMPOSITION_DATA (buf, nchars, 0, method);
@@ -7379,6 +7747,8 @@ encode_coding (struct coding_system *coding)
    int max_lookup;
    struct ccl_spec cclspec;
  
    int max_lookup;
    struct ccl_spec cclspec;
  
+  USE_SAFE_ALLOCA;
+
    attrs = CODING_ID_ATTRS (coding->id);
    if (coding->encoder == encode_coding_raw_text)
      translation_table = Qnil, max_lookup = 0;
    attrs = CODING_ID_ATTRS (coding->id);
    if (coding->encoder == encode_coding_raw_text)
      translation_table = Qnil, max_lookup = 0;
@@ -7412,7 +7782,9 @@ encode_coding (struct coding_system *coding)
    } while (coding->consumed_char < coding->src_chars);
  
    if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
    } while (coding->consumed_char < coding->src_chars);
  
    if (BUFFERP (coding->dst_object) && coding->produced_char > 0)
-    insert_from_gap (coding->produced_char, coding->produced);
+    insert_from_gap (coding->produced_char, coding->produced, 0);
+
+  SAFE_FREE ();
  }
  
  
  }
  
  
@@ -7466,7 +7838,7 @@ make_conversion_work_buffer (bool multibyte)
  }
  
  
  }
  
  
-static Lisp_Object
+static void
  code_conversion_restore (Lisp_Object arg)
  {
    Lisp_Object current, workbuf;
  code_conversion_restore (Lisp_Object arg)
  {
    Lisp_Object current, workbuf;
@@ -7484,7 +7856,6 @@ code_conversion_restore (Lisp_Object arg)
      }
    set_buffer_internal (XBUFFER (current));
    UNGCPRO;
      }
    set_buffer_internal (XBUFFER (current));
    UNGCPRO;
-  return Qnil;
  }
  
  Lisp_Object
  }
  
  Lisp_Object
@@ -7506,8 +7877,6 @@ decode_coding_gap (struct coding_system *coding,
    ptrdiff_t count = SPECPDL_INDEX ();
    Lisp_Object attrs;
  
    ptrdiff_t count = SPECPDL_INDEX ();
    Lisp_Object attrs;
  
-  code_conversion_save (0, 0);
-
    coding->src_object = Fcurrent_buffer ();
    coding->src_chars = chars;
    coding->src_bytes = bytes;
    coding->src_object = Fcurrent_buffer ();
    coding->src_chars = chars;
    coding->src_bytes = bytes;
@@ -7519,15 +7888,95 @@ decode_coding_gap (struct coding_system *coding,
    coding->dst_pos_byte = PT_BYTE;
    coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
  
    coding->dst_pos_byte = PT_BYTE;
    coding->dst_multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
  
+  coding->head_ascii = -1;
+  coding->detected_utf8_chars = -1;
+  coding->eol_seen = EOL_SEEN_NONE;
    if (CODING_REQUIRE_DETECTION (coding))
      detect_coding (coding);
    if (CODING_REQUIRE_DETECTION (coding))
      detect_coding (coding);
+  attrs = CODING_ID_ATTRS (coding->id);
+  if (! disable_ascii_optimization
+      && ! coding->src_multibyte
+      && ! NILP (CODING_ATTR_ASCII_COMPAT (attrs))
+      && NILP (CODING_ATTR_POST_READ (attrs))
+      && NILP (get_translation_table (attrs, 0, NULL)))
+    {
+      chars = coding->head_ascii;
+      if (chars < 0)
+       chars = check_ascii (coding);
+      if (chars != bytes)
+       {
+         /* There exists a non-ASCII byte.  */
+         if (EQ (CODING_ATTR_TYPE (attrs), Qutf_8))
+           {
+             if (coding->detected_utf8_chars >= 0)
+               chars = coding->detected_utf8_chars;
+             else
+               chars = check_utf_8 (coding);
+             if (CODING_UTF_8_BOM (coding) != utf_without_bom
+                 && coding->head_ascii == 0
+                 && coding->source[0] == UTF_8_BOM_1
+                 && coding->source[1] == UTF_8_BOM_2
+                 && coding->source[2] == UTF_8_BOM_3)
+               {
+                 chars--;
+                 bytes -= 3;
+                 coding->src_bytes -= 3;
+               }
+           }
+         else
+           chars = -1;
+       }
+      if (chars >= 0)
+       {
+         Lisp_Object eol_type;
+
+         eol_type = CODING_ID_EOL_TYPE (coding->id);
+         if (VECTORP (eol_type))
+           {
+             if (coding->eol_seen != EOL_SEEN_NONE)
+               eol_type = adjust_coding_eol_type (coding, coding->eol_seen);
+           }
+         if (EQ (eol_type, Qmac))
+           {
+             unsigned char *src_end = GAP_END_ADDR;
+             unsigned char *src = src_end - coding->src_bytes;
+
+             while (src < src_end)
+               {
+                 if (*src++ == '\r')
+                   src[-1] = '\n';
+               }
+           }
+         else if (EQ (eol_type, Qdos))
+           {
+             unsigned char *src = GAP_END_ADDR;
+             unsigned char *src_beg = src - coding->src_bytes;
+             unsigned char *dst = src;
+             ptrdiff_t diff;
+
+             while (src_beg < src)
+               {
+                 *--dst = *--src;
+                 if (*src == '\n' && src > src_beg && src[-1] == '\r')
+                   src--;
+               }
+             diff = dst - src;
+             bytes -= diff;
+             chars -= diff;
+           }
+         coding->produced = bytes;
+         coding->produced_char = chars;
+         insert_from_gap (chars, bytes, 1);
+         return;
+       }
+    }
+  code_conversion_save (0, 0);
  
    coding->mode |= CODING_MODE_LAST_BLOCK;
    current_buffer->text->inhibit_shrinking = 1;
    decode_coding (coding);
    current_buffer->text->inhibit_shrinking = 0;
  
  
    coding->mode |= CODING_MODE_LAST_BLOCK;
    current_buffer->text->inhibit_shrinking = 1;
    decode_coding (coding);
    current_buffer->text->inhibit_shrinking = 0;
  
-  attrs = CODING_ID_ATTRS (coding->id);
    if (! NILP (CODING_ATTR_POST_READ (attrs)))
      {
        ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
    if (! NILP (CODING_ATTR_POST_READ (attrs)))
      {
        ptrdiff_t prev_Z = Z, prev_Z_BYTE = Z_BYTE;
@@ -7701,14 +8150,8 @@ decode_coding_object (struct coding_system *coding,
        set_buffer_internal (XBUFFER (coding->dst_object));
        if (dst_bytes < coding->produced)
         {
        set_buffer_internal (XBUFFER (coding->dst_object));
        if (dst_bytes < coding->produced)
         {
+         eassert (coding->produced > 0);
           destination = xrealloc (destination, coding->produced);
           destination = xrealloc (destination, coding->produced);
-         if (! destination)
-           {
-             record_conversion_result (coding,
-                                       CODING_RESULT_INSUFFICIENT_MEM);
-             unbind_to (count, Qnil);
-             return;
-           }
           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
             move_gap_both (BEGV, BEGV_BYTE);
           memcpy (destination, BEGV_ADDR, coding->produced);
           if (BEGV < GPT && GPT < BEGV + coding->produced_char)
             move_gap_both (BEGV, BEGV_BYTE);
           memcpy (destination, BEGV_ADDR, coding->produced);
@@ -7991,11 +8434,21 @@ from_unicode (Lisp_Object str)
    return code_convert_string_norecord (str, Qutf_16le, 0);
  }
  
    return code_convert_string_norecord (str, Qutf_16le, 0);
  }
  
+Lisp_Object
+from_unicode_buffer (const wchar_t* wstr)
+{
+    return from_unicode (
+        make_unibyte_string (
+            (char*) wstr,
+            /* we get one of the two final 0 bytes for free. */
+            1 + sizeof (wchar_t) * wcslen (wstr)));
+}
+
  wchar_t *
  to_unicode (Lisp_Object str, Lisp_Object *buf)
  {
    *buf = code_convert_string_norecord (str, Qutf_16le, 1);
  wchar_t *
  to_unicode (Lisp_Object str, Lisp_Object *buf)
  {
    *buf = code_convert_string_norecord (str, Qutf_16le, 1);
-  /* We need to make a another copy (in addition to the one made by
+  /* We need to make another copy (in addition to the one made by
       code_convert_string_norecord) to ensure that the final string is
       _doubly_ zero terminated --- that is, that the string is
       terminated by two zero bytes and one utf-16le null character.
       code_convert_string_norecord) to ensure that the final string is
       _doubly_ zero terminated --- that is, that the string is
       terminated by two zero bytes and one utf-16le null character.
@@ -8141,6 +8594,11 @@ detect_coding_system (const unsigned char *src,
        enum coding_category category IF_LINT (= 0);
        struct coding_system *this IF_LINT (= NULL);
        int c, i;
        enum coding_category category IF_LINT (= 0);
        struct coding_system *this IF_LINT (= NULL);
        int c, i;
+      bool inhibit_nbd = inhibit_flag (coding.spec.undecided.inhibit_nbd,
+                                      inhibit_null_byte_detection);
+      bool inhibit_ied = inhibit_flag (coding.spec.undecided.inhibit_ied,
+                                      inhibit_iso_escape_detection);
+      bool prefer_utf_8 = coding.spec.undecided.prefer_utf_8;
  
        /* Skip all ASCII bytes except for a few ISO2022 controls.  */
        for (; src < src_end; src++)
  
        /* Skip all ASCII bytes except for a few ISO2022 controls.  */
        for (; src < src_end; src++)
@@ -8155,7 +8613,7 @@ detect_coding_system (const unsigned char *src,
           else if (c < 0x20)
             {
               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
           else if (c < 0x20)
             {
               if ((c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
-                 && ! inhibit_iso_escape_detection
+                 && ! inhibit_ied
                   && ! detect_info.checked)
                 {
                   if (detect_coding_iso_2022 (&coding, &detect_info))
                   && ! detect_info.checked)
                 {
                   if (detect_coding_iso_2022 (&coding, &detect_info))
@@ -8174,7 +8632,7 @@ detect_coding_system (const unsigned char *src,
                       break;
                     }
                 }
                       break;
                     }
                 }
-             else if (! c && !inhibit_null_byte_detection)
+             else if (! c && !inhibit_nbd)
                 {
                   null_byte_found = 1;
                   if (eight_bit_found)
                 {
                   null_byte_found = 1;
                   if (eight_bit_found)
@@ -8207,6 +8665,12 @@ detect_coding_system (const unsigned char *src,
                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
                 }
                   detect_info.checked |= ~CATEGORY_MASK_UTF_16;
                   detect_info.rejected |= ~CATEGORY_MASK_UTF_16;
                 }
+             else if (prefer_utf_8
+                      && detect_coding_utf_8 (&coding, &detect_info))
+               {
+                 detect_info.checked |= ~CATEGORY_MASK_UTF_8;
+                 detect_info.rejected |= ~CATEGORY_MASK_UTF_8;
+               }
               for (i = 0; i < coding_category_raw_text; i++)
                 {
                   category = coding_priorities[i];
               for (i = 0; i < coding_category_raw_text; i++)
                 {
                   category = coding_priorities[i];
@@ -8247,20 +8711,20 @@ detect_coding_system (const unsigned char *src,
         {
           detect_info.found = CATEGORY_MASK_RAW_TEXT;
           id = CODING_SYSTEM_ID (Qno_conversion);
         {
           detect_info.found = CATEGORY_MASK_RAW_TEXT;
           id = CODING_SYSTEM_ID (Qno_conversion);
-         val = Fcons (make_number (id), Qnil);
+         val = list1 (make_number (id));
         }
        else if (! detect_info.rejected && ! detect_info.found)
         {
           detect_info.found = CATEGORY_MASK_ANY;
           id = coding_categories[coding_category_undecided].id;
         }
        else if (! detect_info.rejected && ! detect_info.found)
         {
           detect_info.found = CATEGORY_MASK_ANY;
           id = coding_categories[coding_category_undecided].id;
-         val = Fcons (make_number (id), Qnil);
+         val = list1 (make_number (id));
         }
        else if (highest)
         {
           if (detect_info.found)
             {
               detect_info.found = 1 << category;
         }
        else if (highest)
         {
           if (detect_info.found)
             {
               detect_info.found = 1 << category;
-             val = Fcons (make_number (this->id), Qnil);
+             val = list1 (make_number (this->id));
             }
           else
             for (i = 0; i < coding_category_raw_text; i++)
             }
           else
             for (i = 0; i < coding_category_raw_text; i++)
@@ -8268,7 +8732,7 @@ detect_coding_system (const unsigned char *src,
                 {
                   detect_info.found = 1 << coding_priorities[i];
                   id = coding_categories[coding_priorities[i]].id;
                 {
                   detect_info.found = 1 << coding_priorities[i];
                   id = coding_categories[coding_priorities[i]].id;
-                 val = Fcons (make_number (id), Qnil);
+                 val = list1 (make_number (id));
                   break;
                 }
         }
                   break;
                 }
         }
@@ -8285,7 +8749,7 @@ detect_coding_system (const unsigned char *src,
                   found |= 1 << category;
                   id = coding_categories[category].id;
                   if (id >= 0)
                   found |= 1 << category;
                   id = coding_categories[category].id;
                   if (id >= 0)
-                   val = Fcons (make_number (id), val);
+                   val = list1 (make_number (id));
                 }
             }
           for (i = coding_category_raw_text - 1; i >= 0; i--)
                 }
             }
           for (i = coding_category_raw_text - 1; i >= 0; i--)
@@ -8310,7 +8774,7 @@ detect_coding_system (const unsigned char *src,
             this = coding_categories + coding_category_utf_8_sig;
           else
             this = coding_categories + coding_category_utf_8_nosig;
             this = coding_categories + coding_category_utf_8_sig;
           else
             this = coding_categories + coding_category_utf_8_nosig;
-         val = Fcons (make_number (this->id), Qnil);
+         val = list1 (make_number (this->id));
         }
      }
    else if (base_category == coding_category_utf_16_auto)
         }
      }
    else if (base_category == coding_category_utf_16_auto)
@@ -8327,13 +8791,13 @@ detect_coding_system (const unsigned char *src,
             this = coding_categories + coding_category_utf_16_be_nosig;
           else
             this = coding_categories + coding_category_utf_16_le_nosig;
             this = coding_categories + coding_category_utf_16_be_nosig;
           else
             this = coding_categories + coding_category_utf_16_le_nosig;
-         val = Fcons (make_number (this->id), Qnil);
+         val = list1 (make_number (this->id));
         }
      }
    else
      {
        detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
         }
      }
    else
      {
        detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs));
-      val = Fcons (make_number (coding.id), Qnil);
+      val = list1 (make_number (coding.id));
      }
  
    /* Then, detect eol-format if necessary.  */
      }
  
    /* Then, detect eol-format if necessary.  */
@@ -8545,8 +9009,7 @@ DEFUN ("find-coding-systems-region-internal",
         Lisp_Object attrs;
  
         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
         Lisp_Object attrs;
  
         attrs = AREF (CODING_SYSTEM_SPEC (XCAR (tail)), 0);
-       if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs))
-           && ! EQ (CODING_ATTR_TYPE (attrs), Qundecided))
+       if (EQ (XCAR (tail), CODING_ATTR_BASE_NAME (attrs)))
           {
             ASET (attrs, coding_attr_trans_tbl,
                   get_translation_table (attrs, 1, NULL));
           {
             ASET (attrs, coding_attr_trans_tbl,
                   get_translation_table (attrs, 1, NULL));
@@ -8805,7 +9268,7 @@ is nil.  */)
        attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
        ASET (attrs, coding_attr_trans_tbl,
             get_translation_table (attrs, 1, NULL));
        attrs = AREF (CODING_SYSTEM_SPEC (elt), 0);
        ASET (attrs, coding_attr_trans_tbl,
             get_translation_table (attrs, 1, NULL));
-      list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list);
+      list = Fcons (list2 (elt, attrs), list);
      }
  
    if (STRINGP (start))
      }
  
    if (STRINGP (start))
@@ -9216,7 +9679,7 @@ DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_intern
    tset_charset_list
      (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK
             ? coding_charset_list (terminal_coding)
    tset_charset_list
      (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK
             ? coding_charset_list (terminal_coding)
-           : Fcons (make_number (charset_ascii), Qnil)));
+           : list1 (make_number (charset_ascii))));
    return Qnil;
  }
  
    return Qnil;
  }
  
@@ -9489,7 +9952,7 @@ make_subsidiaries (Lisp_Object base)
    int i;
  
    memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
    int i;
  
    memcpy (buf, SDATA (SYMBOL_NAME (base)), base_name_len);
-  subsidiaries = Fmake_vector (make_number (3), Qnil);
+  subsidiaries = make_uninit_vector (3);
    for (i = 0; i < 3; i++)
      {
        strcpy (buf + base_name_len, suffixes[i]);
    for (i = 0; i < 3; i++)
      {
        strcpy (buf + base_name_len, suffixes[i]);
@@ -9661,9 +10124,9 @@ usage: (define-coding-system-internal ...)  */)
                 {
                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
                   if (dim < dim2)
                 {
                   dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp)));
                   if (dim < dim2)
-                   tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil));
+                   tmp = list2 (XCAR (tail), tmp);
                   else
                   else
-                   tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil));
+                   tmp = list2 (tmp, XCAR (tail));
                 }
               else
                 {
                 }
               else
                 {
@@ -9674,7 +10137,7 @@ usage: (define-coding-system-internal ...)  */)
                         break;
                     }
                   if (NILP (tmp2))
                         break;
                     }
                   if (NILP (tmp2))
-                   tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil));
+                   tmp = nconc2 (tmp, list1 (XCAR (tail)));
                   else
                     {
                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
                   else
                     {
                       XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2)));
@@ -9789,7 +10252,7 @@ usage: (define-coding-system-internal ...)  */)
        CHECK_VECTOR (initial);
        for (i = 0; i < 4; i++)
         {
        CHECK_VECTOR (initial);
        for (i = 0; i < 4; i++)
         {
-         val = Faref (initial, make_number (i));
+         val = AREF (initial, i);
           if (! NILP (val))
             {
               struct charset *charset;
           if (! NILP (val))
             {
               struct charset *charset;
@@ -9960,7 +10423,17 @@ usage: (define-coding-system-internal ...)  */)
                   : coding_category_utf_8_sig);
      }
    else if (EQ (coding_type, Qundecided))
                   : coding_category_utf_8_sig);
      }
    else if (EQ (coding_type, Qundecided))
-    category = coding_category_undecided;
+    {
+      if (nargs < coding_arg_undecided_max)
+       goto short_args;
+      ASET (attrs, coding_attr_undecided_inhibit_null_byte_detection,
+           args[coding_arg_undecided_inhibit_null_byte_detection]);
+      ASET (attrs, coding_attr_undecided_inhibit_iso_escape_detection,
+           args[coding_arg_undecided_inhibit_iso_escape_detection]);
+      ASET (attrs, coding_attr_undecided_prefer_utf_8,
+           args[coding_arg_undecided_prefer_utf_8]);
+      category = coding_category_undecided;
+    }
    else
      error ("Invalid coding system type: %s",
            SDATA (SYMBOL_NAME (coding_type)));
    else
      error ("Invalid coding system type: %s",
            SDATA (SYMBOL_NAME (coding_type)));
@@ -9982,7 +10455,7 @@ usage: (define-coding-system-internal ...)  */)
        && ! EQ (eol_type, Qmac))
      error ("Invalid eol-type");
  
        && ! EQ (eol_type, Qmac))
      error ("Invalid eol-type");
  
-  aliases = Fcons (name, Qnil);
+  aliases = list1 (name);
  
    if (NILP (eol_type))
      {
  
    if (NILP (eol_type))
      {
@@ -9992,9 +10465,10 @@ usage: (define-coding-system-internal ...)  */)
           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
  
           this_name = AREF (eol_type, i);
           Lisp_Object this_spec, this_name, this_aliases, this_eol_type;
  
           this_name = AREF (eol_type, i);
-         this_aliases = Fcons (this_name, Qnil);
+         this_aliases = list1 (this_name);
           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
           this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac);
-         this_spec = Fmake_vector (make_number (3), attrs);
+         this_spec = make_uninit_vector (3);
+         ASET (this_spec, 0, attrs);
           ASET (this_spec, 1, this_aliases);
           ASET (this_spec, 2, this_eol_type);
           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
           ASET (this_spec, 1, this_aliases);
           ASET (this_spec, 2, this_eol_type);
           Fputhash (this_name, this_spec, Vcoding_system_hash_table);
@@ -10007,7 +10481,8 @@ usage: (define-coding-system-internal ...)  */)
         }
      }
  
         }
      }
  
-  spec_vec = Fmake_vector (make_number (3), attrs);
+  spec_vec = make_uninit_vector (3);
+  ASET (spec_vec, 0, attrs);
    ASET (spec_vec, 1, aliases);
    ASET (spec_vec, 2, eol_type);
  
    ASET (spec_vec, 1, aliases);
    ASET (spec_vec, 2, eol_type);
  
@@ -10105,7 +10580,7 @@ DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias,
       list.  */
    while (!NILP (XCDR (aliases)))
      aliases = XCDR (aliases);
       list.  */
    while (!NILP (XCDR (aliases)))
      aliases = XCDR (aliases);
-  XSETCDR (aliases, Fcons (alias, Qnil));
+  XSETCDR (aliases, list1 (alias));
  
    eol_type = AREF (spec, 2);
    if (VECTORP (eol_type))
  
    eol_type = AREF (spec, 2);
    if (VECTORP (eol_type))
@@ -10339,11 +10814,6 @@ syms_of_coding (void)
    Fput (Qcoding_system_error, Qerror_message,
         build_pure_c_string ("Invalid coding system"));
  
    Fput (Qcoding_system_error, Qerror_message,
         build_pure_c_string ("Invalid coding system"));
  
-  /* Intern this now in case it isn't already done.
-     Setting this variable twice is harmless.
-     But don't staticpro it here--that is done in alloc.c.  */
-  Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
-
    DEFSYM (Qtranslation_table, "translation-table");
    Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
    DEFSYM (Qtranslation_table_id, "translation-table-id");
    DEFSYM (Qtranslation_table, "translation-table");
    Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2));
    DEFSYM (Qtranslation_table_id, "translation-table-id");
@@ -10412,10 +10882,8 @@ syms_of_coding (void)
         intern_c_string ("coding-category-undecided"));
  
    DEFSYM (Qinsufficient_source, "insufficient-source");
         intern_c_string ("coding-category-undecided"));
  
    DEFSYM (Qinsufficient_source, "insufficient-source");
-  DEFSYM (Qinconsistent_eol, "inconsistent-eol");
    DEFSYM (Qinvalid_source, "invalid-source");
    DEFSYM (Qinterrupted, "interrupted");
    DEFSYM (Qinvalid_source, "invalid-source");
    DEFSYM (Qinterrupted, "interrupted");
-  DEFSYM (Qinsufficient_memory, "insufficient-memory");
    DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
  
    defsubr (&Scoding_system_p);
    DEFSYM (Qcoding_system_define_form, "coding-system-define-form");
  
    defsubr (&Scoding_system_p);
@@ -10716,7 +11184,7 @@ reading if you suppress escape sequence detection.
  
  The other way to read escape sequences in a file without decoding is
  to explicitly specify some coding system that doesn't use ISO-2022
  
  The other way to read escape sequences in a file without decoding is
  to explicitly specify some coding system that doesn't use ISO-2022
-escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].  */);
+escape sequence (e.g., `latin-1') on reading by \\[universal-coding-system-argument].  */);
    inhibit_iso_escape_detection = 0;
  
    DEFVAR_BOOL ("inhibit-null-byte-detection",
    inhibit_iso_escape_detection = 0;
  
    DEFVAR_BOOL ("inhibit-null-byte-detection",
@@ -10732,6 +11200,11 @@ from GNU Find and GNU Grep.  Emacs will then ignore the null bytes and
  decode text as usual.  */);
    inhibit_null_byte_detection = 0;
  
  decode text as usual.  */);
    inhibit_null_byte_detection = 0;
  
+  DEFVAR_BOOL ("disable-ascii-optimization", disable_ascii_optimization,
+              doc: /* If non-nil, Emacs does not optimize code decoder for ASCII files.
+Internal use only.  Removed after the experimental optimizer gets stable. */);
+  disable_ascii_optimization = 0;
+
    DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
                doc: /* Char table for translating self-inserting characters.
  This is applied to the result of input methods, not their input.
    DEFVAR_LISP ("translation-table-for-input", Vtranslation_table_for_input,
                doc: /* Char table for translating self-inserting characters.
  This is applied to the result of input methods, not their input.
@@ -10743,11 +11216,11 @@ internal character representation.  */);
      Vtranslation_table_for_input = Qnil;
  
    {
      Vtranslation_table_for_input = Qnil;
  
    {
-    Lisp_Object args[coding_arg_max];
+    Lisp_Object args[coding_arg_undecided_max];
      Lisp_Object plist[16];
      int i;
  
      Lisp_Object plist[16];
      int i;
  
-    for (i = 0; i < coding_arg_max; i++)
+    for (i = 0; i < coding_arg_undecided_max; i++)
        args[i] = Qnil;
  
      plist[0] = intern_c_string (":name");
        args[i] = Qnil;
  
      plist[0] = intern_c_string (":name");
@@ -10784,7 +11257,9 @@ character.");
      plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
      plist[15] = args[coding_arg_eol_type] = Qnil;
      args[coding_arg_plist] = Flist (16, plist);
      plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding.");
      plist[15] = args[coding_arg_eol_type] = Qnil;
      args[coding_arg_plist] = Flist (16, plist);
-    Fdefine_coding_system_internal (coding_arg_max, args);
+    args[coding_arg_undecided_inhibit_null_byte_detection] = make_number (0);
+    args[coding_arg_undecided_inhibit_iso_escape_detection] = make_number (0);
+    Fdefine_coding_system_internal (coding_arg_undecided_max, args);
    }
  
    setup_coding_system (Qno_conversion, &safe_terminal_coding);
    }
  
    setup_coding_system (Qno_conversion, &safe_terminal_coding);