*** empty log message ***

[gnu-emacs] / src / coding.c
diff --git a/src/coding.c b/src/coding.c

index 486409414121c1807bcb3a1048d31b4d3c90220d..6d36cc397c6bf02a6a524e480e1bfdc79442c8f6 100644 (file)
--- a/src/coding.c
+++ b/src/coding.c
@@ -1,5 +1,5 @@
  /* Coding system handler (conversion, detection, and etc).
-   Copyright (C) 1995, 1997, 1998, 2002 Electrotechnical Laboratory, JAPAN.
+   Copyright (C) 1995,97,1998,2002,2003  Electrotechnical Laboratory, JAPAN.
     Licensed to the Free Software Foundation.
     Copyright (C) 2001,2002,2003  Free Software Foundation, Inc.
  
@@ -345,6 +345,7 @@ encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
  #include "ccl.h"
  #include "coding.h"
  #include "window.h"
+#include "intervals.h"
  
  #else  /* not emacs */
  
@@ -399,6 +400,8 @@ Lisp_Object Qcoding_system_p, Qcoding_system_error;
     end-of-line format.  */
  Lisp_Object Qemacs_mule, Qraw_text;
  
+Lisp_Object Qutf_8;
+
  /* Coding-systems are handed between Emacs Lisp programs and C internal
     routines by the following three variables.  */
  /* Coding-system for reading files and receiving data from process.  */
@@ -712,7 +715,7 @@ detect_coding_emacs_mule (src, src_end, multibytep)
  #define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p)               \
    do {                                                         \
      int bytes;                                                 \
-                                                               \
+                                                               \
      c = SAFE_ONE_MORE_BYTE ();                                 \
      if (c < 0)                                                 \
        break;                                                   \
@@ -743,7 +746,10 @@ detect_coding_emacs_mule (src, src_end, multibytep)
               break;                                            \
             *p++ = c;                                           \
           }                                                     \
-       if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes))     \
+       if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)      \
+           || (coding->flags /* We are recovering a file.  */  \
+               && p0[0] == LEADING_CODE_8_BIT_CONTROL          \
+               && ! CHAR_HEAD_P (p0[1])))                      \
           c = STRING_CHAR (p0, bytes);                          \
         else                                                    \
           c = -1;                                               \
@@ -847,7 +853,10 @@ decode_composition_emacs_mule (coding, src, src_end,
           else
             {
               int bytes;
-             if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+             if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
+                 || (coding->flags /* We are recovering a file.  */
+                     && src[0] == LEADING_CODE_8_BIT_CONTROL
+                     && ! CHAR_HEAD_P (src[1])))
                 c = STRING_CHAR (src, bytes);
               else
                 c = *src, bytes = 1;
@@ -1001,7 +1010,10 @@ decode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
           p = tmp;
           src++;
         }
-      else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+      else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
+              || (coding->flags /* We are recovering a file.  */
+                  && src[0] == LEADING_CODE_8_BIT_CONTROL
+                  && ! CHAR_HEAD_P (src[1])))
         {
           p = src;
           src += bytes;
@@ -1132,7 +1144,22 @@ encode_coding_emacs_mule (coding, source, destination, src_bytes, dst_bytes)
             EMIT_ONE_BYTE ('\r');
         }
        else if (SINGLE_BYTE_CHAR_P (c))
-       EMIT_ONE_BYTE (c);
+       {
+         if (coding->flags && ! ASCII_BYTE_P (c))
+           {
+             /* As we are auto saving, retain the multibyte form for
+                8-bit chars.  */
+             unsigned char buf[MAX_MULTIBYTE_LENGTH];
+             int bytes = CHAR_STRING (c, buf);
+
+             if (bytes == 1)
+               EMIT_ONE_BYTE (buf[0]);
+             else
+               EMIT_TWO_BYTES (buf[0], buf[1]);
+           }
+         else
+           EMIT_ONE_BYTE (c);
+       }
        else
         EMIT_BYTES (src_base, src);
        coding->consumed_char++;
@@ -1796,7 +1823,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
  
    while (1)
      {
-      int c1, c2;
+      int c1, c2 = 0;
  
        src_base = src;
        ONE_MORE_BYTE (c1);
@@ -2036,6 +2063,78 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
                 }
               continue;
  
+           case '%':
+             if (COMPOSING_P (coding))
+               DECODE_COMPOSITION_END ('1');
+             ONE_MORE_BYTE (c1);
+             if (c1 == '/')
+               {
+                 /* CTEXT extended segment:
+                    ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
+                    We keep these bytes as is for the moment.
+                    They may be decoded by post-read-conversion.  */
+                 int dim, M, L;
+                 int size, required;
+                 int produced_chars;
+                 
+                 ONE_MORE_BYTE (dim);
+                 ONE_MORE_BYTE (M);
+                 ONE_MORE_BYTE (L);
+                 size = ((M - 128) * 128) + (L - 128);
+                 required = 8 + size * 2;
+                 if (dst + required > (dst_bytes ? dst_end : src))
+                   goto label_end_of_loop;
+                 *dst++ = ISO_CODE_ESC;
+                 *dst++ = '%';
+                 *dst++ = '/';
+                 *dst++ = dim;
+                 produced_chars = 4;
+                 dst += CHAR_STRING (M, dst), produced_chars++;
+                 dst += CHAR_STRING (L, dst), produced_chars++;
+                 while (size-- > 0)
+                   {
+                     ONE_MORE_BYTE (c1);
+                     dst += CHAR_STRING (c1, dst), produced_chars++;
+                   }
+                 coding->produced_char += produced_chars;
+               }
+             else if (c1 == 'G')
+               {
+                 unsigned char *d = dst;
+                 int produced_chars;
+
+                 /* XFree86 extension for embedding UTF-8 in CTEXT:
+                    ESC % G --UTF-8-BYTES-- ESC % @
+                    We keep these bytes as is for the moment.
+                    They may be decoded by post-read-conversion.  */
+                 if (d + 6 > (dst_bytes ? dst_end : src))
+                   goto label_end_of_loop;
+                 *d++ = ISO_CODE_ESC;
+                 *d++ = '%';
+                 *d++ = 'G';
+                 produced_chars = 3;
+                 while (d + 1 < (dst_bytes ? dst_end : src))
+                   {
+                     ONE_MORE_BYTE (c1);
+                     if (c1 == ISO_CODE_ESC
+                         && src + 1 < src_end
+                         && src[0] == '%'
+                         && src[1] == '@')
+                       break;
+                     d += CHAR_STRING (c1, d), produced_chars++;
+                   }
+                 if (d + 3 > (dst_bytes ? dst_end : src))
+                   goto label_end_of_loop;
+                 *d++ = ISO_CODE_ESC;
+                 *d++ = '%';
+                 *d++ = '@';
+                 dst = d;
+                 coding->produced_char += produced_chars + 3;
+               }
+             else
+               goto label_invalid_code;
+             continue;
+
             default:
               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
                 goto label_invalid_code;
@@ -2951,7 +3050,7 @@ decode_coding_sjis_big5 (coding, source, destination,
    coding->produced_char = 0;
    while (1)
      {
-      int c, charset, c1, c2;
+      int c, charset, c1, c2 = 0;
  
        src_base = src;
        ONE_MORE_BYTE (c1);
@@ -4407,7 +4506,11 @@ encoding_buffer_size (coding, src_bytes)
    int magnification;
  
    if (coding->type == coding_type_ccl)
-    magnification = coding->spec.ccl.encoder.buf_magnification;
+    {
+      magnification = coding->spec.ccl.encoder.buf_magnification;
+      if (coding->eol_type == CODING_EOL_CRLF)
+       magnification *= 2;
+    }
    else if (CODING_REQUIRE_ENCODING (coding))
      magnification = 3;
    else
@@ -4492,7 +4595,10 @@ ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
        if (ccl->eol_type ==CODING_EOL_UNDECIDED)
         ccl->eol_type = CODING_EOL_LF;
        ccl->cr_consumed = coding->spec.ccl.cr_carryover;
+      ccl->eight_bit_control = coding->dst_multibyte;
      }
+  else
+    ccl->eight_bit_control = 1;
    ccl->multibyte = coding->src_multibyte;
    if (coding->spec.ccl.eight_bit_carryover[0] != 0)
      {
@@ -5892,12 +5998,13 @@ run_pre_post_conversion_on_str (str, coding, encodep)
    buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
    buf = XBUFFER (buffer);
  
+  delete_all_overlays (buf);
    buf->directory = current_buffer->directory;
    buf->read_only = Qnil;
    buf->filename = Qnil;
    buf->undo_list = Qt;
-  buf->overlays_before = Qnil;
-  buf->overlays_after = Qnil;
+  eassert (buf->overlays_before == NULL);
+  eassert (buf->overlays_after == NULL);
  
    set_buffer_internal (buf);
    /* We must insert the contents of STR as is without
@@ -6433,7 +6540,7 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
       Lisp_Object safe_codings, work_table;
       int *single_byte_char_found;
  {
-  int c, len, i;
+  int c, len;
    Lisp_Object val, ch;
    Lisp_Object prev, tail;
  
@@ -7485,6 +7592,9 @@ syms_of_coding ()
    Qraw_text = intern ("raw-text");
    staticpro (&Qraw_text);
  
+  Qutf_8 = intern ("utf-8");
+  staticpro (&Qutf_8);
+
    defsubr (&Scoding_system_p);
    defsubr (&Sread_coding_system);
    defsubr (&Sread_non_nil_coding_system);
@@ -7771,3 +7881,5 @@ emacs_strerror (error_number)
  
  #endif /* emacs */
  
+/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
+   (do not change this comment) */