/* Coding system handler (conversion, detection, and etc).
- Copyright (C) 1995, 1997, 1998, 2002 Electrotechnical Laboratory, JAPAN.
+ Copyright (C) 1995,97,1998,2002,2003 Electrotechnical Laboratory, JAPAN.
Licensed to the Free Software Foundation.
Copyright (C) 2001,2002,2003 Free Software Foundation, Inc.
#include "ccl.h"
#include "coding.h"
#include "window.h"
+#include "intervals.h"
#else /* not emacs */
end-of-line format. */
Lisp_Object Qemacs_mule, Qraw_text;
+Lisp_Object Qutf_8;
+
/* Coding-systems are handed between Emacs Lisp programs and C internal
routines by the following three variables. */
/* Coding-system for reading files and receiving data from process. */
#define DECODE_EMACS_MULE_COMPOSITION_CHAR(c, p) \
do { \
int bytes; \
- \
+ \
c = SAFE_ONE_MORE_BYTE (); \
if (c < 0) \
break; \
break; \
*p++ = c; \
} \
- if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes)) \
+ if (UNIBYTE_STR_AS_MULTIBYTE_P (p0, p - p0, bytes) \
+ || (coding->flags /* We are recovering a file. */ \
+ && p0[0] == LEADING_CODE_8_BIT_CONTROL \
+ && ! CHAR_HEAD_P (p0[1]))) \
c = STRING_CHAR (p0, bytes); \
else \
c = -1; \
else
{
int bytes;
- if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+ if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
+ || (coding->flags /* We are recovering a file. */
+ && src[0] == LEADING_CODE_8_BIT_CONTROL
+ && ! CHAR_HEAD_P (src[1])))
c = STRING_CHAR (src, bytes);
else
c = *src, bytes = 1;
p = tmp;
src++;
}
- else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+ else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes)
+ || (coding->flags /* We are recovering a file. */
+ && src[0] == LEADING_CODE_8_BIT_CONTROL
+ && ! CHAR_HEAD_P (src[1])))
{
p = src;
src += bytes;
EMIT_ONE_BYTE ('\r');
}
else if (SINGLE_BYTE_CHAR_P (c))
- EMIT_ONE_BYTE (c);
+ {
+ if (coding->flags && ! ASCII_BYTE_P (c))
+ {
+ /* As we are auto saving, retain the multibyte form for
+ 8-bit chars. */
+ unsigned char buf[MAX_MULTIBYTE_LENGTH];
+ int bytes = CHAR_STRING (c, buf);
+
+ if (bytes == 1)
+ EMIT_ONE_BYTE (buf[0]);
+ else
+ EMIT_TWO_BYTES (buf[0], buf[1]);
+ }
+ else
+ EMIT_ONE_BYTE (c);
+ }
else
EMIT_BYTES (src_base, src);
coding->consumed_char++;
while (1)
{
- int c1, c2;
+ int c1, c2 = 0;
src_base = src;
ONE_MORE_BYTE (c1);
}
continue;
+ case '%':
+ if (COMPOSING_P (coding))
+ DECODE_COMPOSITION_END ('1');
+ ONE_MORE_BYTE (c1);
+ if (c1 == '/')
+ {
+ /* CTEXT extended segment:
+ ESC % / [0-4] M L --ENCODING-NAME-- \002 --BYTES--
+ We keep these bytes as is for the moment.
+ They may be decoded by post-read-conversion. */
+ int dim, M, L;
+ int size, required;
+ int produced_chars;
+
+ ONE_MORE_BYTE (dim);
+ ONE_MORE_BYTE (M);
+ ONE_MORE_BYTE (L);
+ size = ((M - 128) * 128) + (L - 128);
+ required = 8 + size * 2;
+ if (dst + required > (dst_bytes ? dst_end : src))
+ goto label_end_of_loop;
+ *dst++ = ISO_CODE_ESC;
+ *dst++ = '%';
+ *dst++ = '/';
+ *dst++ = dim;
+ produced_chars = 4;
+ dst += CHAR_STRING (M, dst), produced_chars++;
+ dst += CHAR_STRING (L, dst), produced_chars++;
+ while (size-- > 0)
+ {
+ ONE_MORE_BYTE (c1);
+ dst += CHAR_STRING (c1, dst), produced_chars++;
+ }
+ coding->produced_char += produced_chars;
+ }
+ else if (c1 == 'G')
+ {
+ unsigned char *d = dst;
+ int produced_chars;
+
+ /* XFree86 extension for embedding UTF-8 in CTEXT:
+ ESC % G --UTF-8-BYTES-- ESC % @
+ We keep these bytes as is for the moment.
+ They may be decoded by post-read-conversion. */
+ if (d + 6 > (dst_bytes ? dst_end : src))
+ goto label_end_of_loop;
+ *d++ = ISO_CODE_ESC;
+ *d++ = '%';
+ *d++ = 'G';
+ produced_chars = 3;
+ while (d + 1 < (dst_bytes ? dst_end : src))
+ {
+ ONE_MORE_BYTE (c1);
+ if (c1 == ISO_CODE_ESC
+ && src + 1 < src_end
+ && src[0] == '%'
+ && src[1] == '@')
+ break;
+ d += CHAR_STRING (c1, d), produced_chars++;
+ }
+ if (d + 3 > (dst_bytes ? dst_end : src))
+ goto label_end_of_loop;
+ *d++ = ISO_CODE_ESC;
+ *d++ = '%';
+ *d++ = '@';
+ dst = d;
+ coding->produced_char += produced_chars + 3;
+ }
+ else
+ goto label_invalid_code;
+ continue;
+
default:
if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
goto label_invalid_code;
coding->produced_char = 0;
while (1)
{
- int c, charset, c1, c2;
+ int c, charset, c1, c2 = 0;
src_base = src;
ONE_MORE_BYTE (c1);
int magnification;
if (coding->type == coding_type_ccl)
- magnification = coding->spec.ccl.encoder.buf_magnification;
+ {
+ magnification = coding->spec.ccl.encoder.buf_magnification;
+ if (coding->eol_type == CODING_EOL_CRLF)
+ magnification *= 2;
+ }
else if (CODING_REQUIRE_ENCODING (coding))
magnification = 3;
else
if (ccl->eol_type ==CODING_EOL_UNDECIDED)
ccl->eol_type = CODING_EOL_LF;
ccl->cr_consumed = coding->spec.ccl.cr_carryover;
+ ccl->eight_bit_control = coding->dst_multibyte;
}
+ else
+ ccl->eight_bit_control = 1;
ccl->multibyte = coding->src_multibyte;
if (coding->spec.ccl.eight_bit_carryover[0] != 0)
{
buffer = Fget_buffer_create (build_string (" *code-converting-work*"));
buf = XBUFFER (buffer);
+ delete_all_overlays (buf);
buf->directory = current_buffer->directory;
buf->read_only = Qnil;
buf->filename = Qnil;
buf->undo_list = Qt;
- buf->overlays_before = Qnil;
- buf->overlays_after = Qnil;
+ eassert (buf->overlays_before == NULL);
+ eassert (buf->overlays_after == NULL);
set_buffer_internal (buf);
/* We must insert the contents of STR as is without
Lisp_Object safe_codings, work_table;
int *single_byte_char_found;
{
- int c, len, i;
+ int c, len;
Lisp_Object val, ch;
Lisp_Object prev, tail;
Qraw_text = intern ("raw-text");
staticpro (&Qraw_text);
+ Qutf_8 = intern ("utf-8");
+ staticpro (&Qutf_8);
+
defsubr (&Scoding_system_p);
defsubr (&Sread_coding_system);
defsubr (&Sread_non_nil_coding_system);
#endif /* emacs */
+/* arch-tag: 3a3a2b01-5ff6-4071-9afe-f5b808d9229d
+ (do not change this comment) */