Merge from emacs--devo--0

[gnu-emacs] / src / coding.c
diff --git a/src/coding.c b/src/coding.c

index 659b52b93fb03704163c818c4268322beadb8890..e2ce0c9f8de7a2b8d87ddd726639a382f6b4af0e 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,7 +1,8 @@
  /* Coding system handler (conversion, detection, and etc).
     Copyright (C) 2001, 2002, 2003, 2004, 2005,
-                 2006 Free Software Foundation, Inc.
-   Copyright (C) 1995, 1997, 1998, 2002, 2003, 2004, 2005
+                 2006, 2007 Free Software Foundation, Inc.
+   Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
+     2005, 2006, 2007
       National Institute of Advanced Industrial Science and Technology (AIST)
       Registration Number H14PRO021
  
@@ -9,7 +10,7 @@ This file is part of GNU Emacs.
  
  GNU Emacs is free software; you can redistribute it and/or modify
  it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
+the Free Software Foundation; either version 3, or (at your option)
  any later version.
  
  GNU Emacs is distributed in the hope that it will be useful,
@@ -219,14 +220,15 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
  
  
  /* Like ONE_MORE_BYTE, but 8-bit bytes of data at SRC are in multibyte
-   form if MULTIBYTEP is nonzero.  */
+   form if MULTIBYTEP is nonzero.  In addition, if SRC is not less
+   than SRC_END, return with RET.  */
  
-#define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep)          \
+#define ONE_MORE_BYTE_CHECK_MULTIBYTE(c1, multibytep, ret)     \
    do {                                                         \
      if (src >= src_end)                                                \
        {                                                                \
         coding->result = CODING_FINISH_INSUFFICIENT_SRC;        \
-       goto label_end_of_loop;                                 \
+       return ret;                                             \
        }                                                                \
      c1 = *src++;                                               \
      if (multibytep && c1 == LEADING_CODE_8_BIT_CONTROL)                \
@@ -628,15 +630,15 @@ detect_coding_emacs_mule (src, src_end, multibytep)
  
    while (1)
      {
-      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
-
+      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep,
+                                    CODING_CATEGORY_MASK_EMACS_MULE);
        if (composing)
         {
           if (c < 0xA0)
             composing = 0;
           else if (c == 0xA0)
             {
-             ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+             ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
               c &= 0x7F;
             }
           else
@@ -665,8 +667,6 @@ detect_coding_emacs_mule (src, src_end, multibytep)
             }
         }
      }
- label_end_of_loop:
-  return CODING_CATEGORY_MASK_EMACS_MULE;
  }
  
  
@@ -1421,9 +1421,9 @@ detect_coding_iso2022 (src, src_end, multibytep)
    Lisp_Object safe_chars;
  
    reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
-  while (mask && src < src_end)
+  while (mask)
      {
-      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
      retry:
        switch (c)
         {
@@ -1431,11 +1431,11 @@ detect_coding_iso2022 (src, src_end, multibytep)
           if (inhibit_iso_escape_detection)
             break;
           single_shifting = 0;
-         ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+         ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
           if (c >= '(' && c <= '/')
             {
               /* Designation sequence for a charset of dimension 1.  */
-             ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
+             ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, mask & mask_found);
               if (c1 < ' ' || c1 >= 0x80
                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
                 /* Invalid designation sequence.  Just ignore.  */
@@ -1445,13 +1445,14 @@ detect_coding_iso2022 (src, src_end, multibytep)
           else if (c == '$')
             {
               /* Designation sequence for a charset of dimension 2.  */
-             ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+             ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, mask & mask_found);
               if (c >= '@' && c <= 'B')
                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
                 reg[0] = charset = iso_charset_table[1][0][c];
               else if (c >= '(' && c <= '/')
                 {
-                 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
+                 ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep,
+                                                mask & mask_found);
                   if (c1 < ' ' || c1 >= 0x80
                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
                     /* Invalid designation sequence.  Just ignore.  */
@@ -1626,7 +1627,8 @@ detect_coding_iso2022 (src, src_end, multibytep)
                   c = -1;
                   while (src < src_end)
                     {
-                     ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+                     ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep,
+                                                    mask & mask_found);
                       if (c < 0xA0)
                         break;
                       i++;
@@ -1644,7 +1646,6 @@ detect_coding_iso2022 (src, src_end, multibytep)
           break;
         }
      }
- label_end_of_loop:
    return (mask & mask_found);
  }
  
@@ -2915,20 +2916,18 @@ detect_coding_sjis (src, src_end, multibytep)
  
    while (1)
      {
-      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_SJIS);
        if (c < 0x80)
         continue;
        if (c == 0x80 || c == 0xA0 || c > 0xEF)
         return 0;
        if (c <= 0x9F || c >= 0xE0)
         {
-         ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+         ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
           if (c < 0x40 || c == 0x7F || c > 0xFC)
             return 0;
         }
      }
- label_end_of_loop:
-  return CODING_CATEGORY_MASK_SJIS;
  }
  
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
@@ -2947,17 +2946,15 @@ detect_coding_big5 (src, src_end, multibytep)
  
    while (1)
      {
-      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_BIG5);
        if (c < 0x80)
         continue;
        if (c < 0xA1 || c > 0xFE)
         return 0;
-      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
        if (c < 0x40 || (c > 0x7F && c < 0xA1) || c > 0xFE)
         return 0;
      }
- label_end_of_loop:
-  return CODING_CATEGORY_MASK_BIG5;
  }
  
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
@@ -2985,7 +2982,7 @@ detect_coding_utf_8 (src, src_end, multibytep)
  
    while (1)
      {
-      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_UTF_8);
        if (UTF_8_1_OCTET_P (c))
         continue;
        else if (UTF_8_2_OCTET_LEADING_P (c))
@@ -3003,16 +3000,13 @@ detect_coding_utf_8 (src, src_end, multibytep)
  
        do
         {
-         ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+         ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, 0);
           if (!UTF_8_EXTRA_OCTET_P (c))
             return 0;
           seq_maybe_bytes--;
         }
        while (seq_maybe_bytes > 0);
      }
-
- label_end_of_loop:
-  return CODING_CATEGORY_MASK_UTF_8;
  }
  
  /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
@@ -3041,15 +3035,13 @@ detect_coding_utf_16 (src, src_end, multibytep)
    struct coding_system dummy_coding;
    struct coding_system *coding = &dummy_coding;
  
-  ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep);
-  ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep);
+  ONE_MORE_BYTE_CHECK_MULTIBYTE (c1, multibytep, 0);
+  ONE_MORE_BYTE_CHECK_MULTIBYTE (c2, multibytep, 0);
  
    if ((c1 == 0xFF) && (c2 == 0xFE))
      return CODING_CATEGORY_MASK_UTF_16_LE;
    else if ((c1 == 0xFE) && (c2 == 0xFF))
      return CODING_CATEGORY_MASK_UTF_16_BE;
-
- label_end_of_loop:
    return 0;
  }
  
@@ -3318,12 +3310,10 @@ detect_coding_ccl (src, src_end, multibytep)
    valid = coding_system_table[CODING_CATEGORY_IDX_CCL]->spec.ccl.valid_codes;
    while (1)
      {
-      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep);
+      ONE_MORE_BYTE_CHECK_MULTIBYTE (c, multibytep, CODING_CATEGORY_MASK_CCL);
        if (! valid[c])
         return 0;
      }
- label_end_of_loop:
-  return CODING_CATEGORY_MASK_CCL;
  }
  
  \f
@@ -5588,6 +5578,8 @@ code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
        inhibit_modification_hooks = saved_inhibit_modification_hooks;
      }
  
+  coding->heading_ascii = 0;
+
    if (! encodep && CODING_REQUIRE_DETECTION (coding))
      {
        /* We must detect encoding of text and eol format.  */
@@ -6068,7 +6060,7 @@ set_conversion_work_buffer (multibyte)
        /* As we are already in the work buffer, we must generate a new
          buffer for the work.  */
        Lisp_Object name;
-       
+
        name = Fgenerate_new_buffer_name (Vcode_conversion_workbuf_name, Qnil);
        buffer = buffer_to_kill = Fget_buffer_create (name);
        buf = XBUFFER (buffer);
@@ -6232,6 +6224,8 @@ decode_coding_string (str, coding, nocopy)
    saved_coding_symbol = coding->symbol;
    coding->src_multibyte = STRING_MULTIBYTE (str);
    coding->dst_multibyte = 1;
+  coding->heading_ascii = 0;
+
    if (CODING_REQUIRE_DETECTION (coding))
      {
        /* See the comments in code_convert_region.  */
@@ -6444,6 +6438,7 @@ encode_coding_string (str, coding, nocopy)
    /* Try to skip the heading and tailing ASCIIs.  We can't skip them
       if we must run CCL program or there are compositions to
       encode.  */
+  coding->heading_ascii = 0;
    if (coding->type != coding_type_ccl
        && (! coding->cmp_data || coding->cmp_data->used == 0))
      {
@@ -6591,8 +6586,7 @@ The value of this property should be a vector of length 5.  */)
      }
    if (!NILP (Fcoding_system_p (coding_system)))
      return coding_system;
-  while (1)
-    Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
+  xsignal1 (Qcoding_system_error, coding_system);
  }
  \f
  Lisp_Object
@@ -6666,7 +6660,8 @@ sequence containing the bytes in the region between START and END when
  the coding system `undecided' is specified.  The list is ordered by
  priority decided in the current language environment.
  
-If only ASCII characters are found, it returns a list of single element
+If only ASCII characters are found (except for such ISO-2022 control
+characters ISO-2022 as ESC), it returns a list of single element
  `undecided' or its subsidiary coding system according to a detected
  end-of-line format.
  
@@ -6713,7 +6708,8 @@ sequence containing the bytes in STRING when the coding system
  `undecided' is specified.  The list is ordered by priority decided in
  the current language environment.
  
-If only ASCII characters are found, it returns a list of single element
+If only ASCII characters are found (except for such ISO-2022 control
+characters ISO-2022 as ESC), it returns a list of single element
  `undecided' or its subsidiary coding system according to a detected
  end-of-line format.
  
@@ -7280,7 +7276,7 @@ Return the corresponding character.  */)
  }
  
  DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
-       doc: /* Encode a Japanese character CHAR to shift_jis encoding.
+       doc: /* Encode a Japanese character CH to shift_jis encoding.
  Return the corresponding code in SJIS.  */)
       (ch)
       Lisp_Object ch;
@@ -7340,7 +7336,7 @@ Return the corresponding character.  */)
  }
  
  DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
-       doc: /* Encode the Big5 character CHAR to BIG5 coding system.
+       doc: /* Encode the Big5 character CH to BIG5 coding system.
  Return the corresponding character code in Big5.  */)
       (ch)
       Lisp_Object ch;
@@ -7465,7 +7461,7 @@ is selected as the TARGET.  For example, if OPERATION does file I/O,
  whichever argument specifies the file name is TARGET.
  
  TARGET has a meaning which depends on OPERATION:
-  For file I/O, TARGET is a file name.
+  For file I/O, TARGET is a file name (except for the special case below).
    For process I/O, TARGET is a process name.
    For network I/O, TARGET is a service name or a port number
  
@@ -7476,8 +7472,17 @@ They may specify a coding system, a cons of coding systems,
  or a function symbol to call.
  In the last case, we call the function with one argument,
  which is a list of all the arguments given to this function.
+If the function can't decide a coding system, it can return
+`undecided' so that the normal code-detection is performed.
+
+If OPERATION is `insert-file-contents', the argument corresponding to
+TARGET may be a cons (FILENAME . BUFFER).  In that case, FILENAME is a
+file name to look up, and BUFFER is a buffer that contains the file's
+contents (not yet decoded).  If `file-coding-system-alist' specifies a
+function to call for FILENAME, that function should examine the
+contents of BUFFER instead of reading the file.
  
-usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
+usage: (find-operation-coding-system OPERATION ARGUMENTS...)  */)
       (nargs, args)
       int nargs;
       Lisp_Object *args;
@@ -7503,8 +7508,12 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
      target_idx = make_number (4);
    target = args[XINT (target_idx) + 1];
    if (!(STRINGP (target)
+       || (EQ (operation, Qinsert_file_contents) && CONSP (target)
+           && STRINGP (XCAR (target)) && BUFFERP (XCDR (target)))
         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
      error ("Invalid argument %d", XINT (target_idx) + 1);
+  if (CONSP (target))
+    target = XCAR (target);
  
    chain = ((EQ (operation, Qinsert_file_contents)
             || EQ (operation, Qwrite_region))
@@ -7537,6 +7546,9 @@ usage: (find-operation-coding-system OPERATION ARGUMENTS ...)  */)
             return Fcons (val, val);
           if (! NILP (Ffboundp (val)))
             {
+             /* We use call1 rather than safe_call1
+                so as to get bug reports about functions called here
+                which don't handle the current interface.  */
               val = call1 (val, Flist (nargs, args));
               if (CONSP (val))
                 return val;
@@ -7620,11 +7632,13 @@ This function is internal use only.  */)
    Lisp_Object safe_chars, slot;
  
    if (NILP (Fcheck_coding_system (coding_system)))
-    Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
+    xsignal1 (Qcoding_system_error, coding_system);
+
    safe_chars = coding_safe_chars (coding_system);
    if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars))
      error ("No valid safe-chars property for %s",
            SDATA (SYMBOL_NAME (coding_system)));
+
    if (EQ (safe_chars, Qt))
      {
        if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars))))
@@ -7959,8 +7973,11 @@ the file contents.
  If VAL is a cons of coding systems, the car part is used for decoding,
  and the cdr part is used for encoding.
  If VAL is a function symbol, the function must return a coding system
-or a cons of coding systems which are used as above.  The function gets
-the arguments with which `find-operation-coding-system' was called.
+or a cons of coding systems which are used as above.  The function is
+called with an argument that is a list of the arguments with which
+`find-operation-coding-system' was called.  If the function can't decide
+a coding system, it can return `undecided' so that the normal
+code-detection is performed.
  
  See also the function `find-operation-coding-system'
  and the variable `auto-coding-alist'.  */);