else \
c = *src, bytes = 1; \
if (!NILP (translation_table)) \
- c = translate_char (translation_table, c, 0, 0, 0); \
+ c = translate_char (translation_table, c, -1, 0, 0); \
src += bytes; \
} while (0)
Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
Lisp_Object Qno_conversion, Qundecided;
Lisp_Object Qcoding_system_history;
-Lisp_Object Qsafe_charsets;
+Lisp_Object Qsafe_chars;
Lisp_Object Qvalid_codes;
extern Lisp_Object Qinsert_file_contents, Qwrite_region;
/* Flag to inhibit code conversion of end-of-line format. */
int inhibit_eol_conversion;
+/* Flag to inhibit ISO2022 escape sequence detection. */
+int inhibit_iso_escape_detection;
+
/* Flag to make buffer-file-coding-system inherit from process-coding. */
int inherit_process_coding_system;
to avoid infinite recursive call. */
static int inhibit_pre_post_conversion;
+/* Char-table containing safe coding systems of each character. */
+Lisp_Object Vchar_coding_system_table;
+Lisp_Object Qchar_coding_system;
+
+/* Return `safe-chars' property of coding system CODING. Don't check
+ validity of CODING. */
+
+Lisp_Object
+coding_safe_chars (coding)
+ struct coding_system *coding;
+{
+ Lisp_Object coding_spec, plist, safe_chars;
+
+ coding_spec = Fget (coding->symbol, Qcoding_system);
+ plist = XVECTOR (coding_spec)->contents[3];
+ safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars);
+ return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt);
+}
+
+#define CODING_SAFE_CHAR_P(safe_chars, c) \
+ (EQ (safe_chars, Qt) || !NILP (CHAR_TABLE_REF (safe_chars, c)))
+
\f
/*** 2. Emacs internal format (emacs-mule) handlers ***/
unsigned char *src_base;
coding->produced_char = 0;
- while (src < src_end)
+ while ((src_base = src) < src_end)
{
unsigned char tmp[MAX_MULTIBYTE_LENGTH], *p;
int bytes;
- src_base = src;
- if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
+ if (*src == '\r')
+ {
+ int c;
+
+ src++;
+ if (coding->eol_type == CODING_EOL_CR)
+ c = '\n';
+ else if (coding->eol_type == CODING_EOL_CRLF)
+ {
+ ONE_MORE_BYTE (c);
+ if (c != '\n')
+ {
+ if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ {
+ coding->result = CODING_FINISH_INCONSISTENT_EOL;
+ goto label_end_of_loop;
+ }
+ src--;
+ c = '\r';
+ }
+ }
+ *dst++ = c;
+ coding->produced_char++;
+ continue;
+ }
+ else if (*src == '\n')
+ {
+ if ((coding->eol_type == CODING_EOL_CR
+ || coding->eol_type == CODING_EOL_CRLF)
+ && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ {
+ coding->result = CODING_FINISH_INCONSISTENT_EOL;
+ goto label_end_of_loop;
+ }
+ *dst++ = *src++;
+ coding->produced_char++;
+ continue;
+ }
+ else if (UNIBYTE_STR_AS_MULTIBYTE_P (src, src_end - src, bytes))
{
p = src;
src += bytes;
while (bytes--) *dst++ = *p++;
coding->produced_char++;
}
+ label_end_of_loop:
coding->consumed = coding->consumed_char = src_base - source;
coding->produced = dst - destination;
}
enum iso_code_class_type iso_code_class[256];
-#define CHARSET_OK(idx, charset) \
- (coding_system_table[idx] \
- && (coding_system_table[idx]->safe_charsets[charset] \
- || (CODING_SPEC_ISO_REQUESTED_DESIGNATION \
- (coding_system_table[idx], charset) \
- != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)))
+#define CHARSET_OK(idx, charset, c) \
+ (coding_system_table[idx] \
+ && (charset == CHARSET_ASCII \
+ || (safe_chars = coding_safe_chars (coding_system_table[idx]), \
+ CODING_SAFE_CHAR_P (safe_chars, c))) \
+ && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \
+ charset) \
+ != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
#define SHIFT_OUT_OK(idx) \
(CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
/* Dummy for ONE_MORE_BYTE. */
struct coding_system dummy_coding;
struct coding_system *coding = &dummy_coding;
+ Lisp_Object safe_chars;
reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
while (mask && src < src_end)
switch (c)
{
case ISO_CODE_ESC:
+ if (inhibit_iso_escape_detection)
+ break;
single_shifting = 0;
ONE_MORE_BYTE (c);
if (c >= '(' && c <= '/')
/* We found a valid designation sequence for CHARSET. */
mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
+ c = MAKE_CHAR (charset, 0, 0);
+ if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset, c))
mask_found |= CODING_CATEGORY_MASK_ISO_7;
else
mask &= ~CODING_CATEGORY_MASK_ISO_7;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
+ if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset, c))
mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
else
mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
+ if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset, c))
mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE;
else
mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
- if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
+ if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset, c))
mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE;
else
mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
break;
case ISO_CODE_SO:
+ if (inhibit_iso_escape_detection)
+ break;
single_shifting = 0;
if (shift_out == 0
&& (reg[1] >= 0
break;
case ISO_CODE_SI:
+ if (inhibit_iso_escape_detection)
+ break;
single_shifting = 0;
if (shift_out == 1)
{
{
int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
+ if (inhibit_iso_escape_detection)
+ break;
if (c != ISO_CODE_CSI)
{
if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
}
else
{
- unsigned char *src_begin = src;
-
mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
| CODING_CATEGORY_MASK_ISO_7_ELSE);
mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
if (!single_shifting
&& mask & CODING_CATEGORY_MASK_ISO_8_2)
{
- int i = 0;
+ int i = 1;
while (src < src_end)
{
ONE_MORE_BYTE (c);
/* Set designation state into CODING. */
#define DECODE_DESIGNATION(reg, dimension, chars, final_char) \
do { \
- int charset; \
+ int charset, c; \
\
if (final_char < '0' || final_char >= 128) \
goto label_invalid_code; \
charset = ISO_CHARSET_TABLE (make_number (dimension), \
make_number (chars), \
make_number (final_char)); \
+ c = MAKE_CHAR (charset, 0, 0); \
if (charset >= 0 \
&& (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
- || coding->safe_charsets[charset])) \
+ || CODING_SAFE_CHAR_P (safe_chars, c))) \
{ \
if (coding->spec.iso2022.last_invalid_designation_register == 0 \
&& reg == 0 \
/* Allocate a memory block for storing information about compositions.
The block is chained to the already allocated blocks. */
-static void
+void
coding_allocate_composition_data (coding, char_offset)
struct coding_system *coding;
int char_offset;
/* Handle compositoin start sequence ESC 0, ESC 2, ESC 3, or ESC 4. */
-#define DECODE_COMPOSITION_START(c1) \
- do { \
- if (coding->composing == COMPOSITION_DISABLED) \
- { \
- *dst++ = ISO_CODE_ESC; \
- *dst++ = c1 & 0x7f; \
- coding->produced_char += 2; \
- } \
- else if (!COMPOSING_P (coding)) \
- { \
- /* This is surely the start of a composition. We must be sure \
- that coding->cmp_data has enough space to store the \
- information about the composition. If not, terminate the \
- current decoding loop, allocate one more memory block for \
- coding->cmp_data in the calller, then start the decoding \
- loop again. We can't allocate memory here directly because \
- it may cause buffer/string relocation. */ \
- if (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
- >= COMPOSITION_DATA_SIZE) \
- { \
- coding->result = CODING_FINISH_INSUFFICIENT_CMP; \
- goto label_end_of_loop; \
- } \
- coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \
- : c1 == '2' ? COMPOSITION_WITH_RULE \
- : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
- : COMPOSITION_WITH_RULE_ALTCHARS); \
- CODING_ADD_COMPOSITION_START (coding, coding->produced_char, \
- coding->composing); \
- coding->composition_rule_follows = 0; \
- } \
- else \
- { \
- /* We are already handling a composition. If the method is \
- the following two, the codes following the current escape \
- sequence are actual characters stored in a buffer. */ \
- if (coding->composing == COMPOSITION_WITH_ALTCHARS \
- || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) \
- { \
- coding->composing = COMPOSITION_RELATIVE; \
- coding->composition_rule_follows = 0; \
- } \
- } \
+#define DECODE_COMPOSITION_START(c1) \
+ do { \
+ if (coding->composing == COMPOSITION_DISABLED) \
+ { \
+ *dst++ = ISO_CODE_ESC; \
+ *dst++ = c1 & 0x7f; \
+ coding->produced_char += 2; \
+ } \
+ else if (!COMPOSING_P (coding)) \
+ { \
+ /* This is surely the start of a composition. We must be sure \
+ that coding->cmp_data has enough space to store the \
+ information about the composition. If not, terminate the \
+ current decoding loop, allocate one more memory block for \
+ coding->cmp_data in the calller, then start the decoding \
+ loop again. We can't allocate memory here directly because \
+ it may cause buffer/string relocation. */ \
+ if (!coding->cmp_data \
+ || (coding->cmp_data->used + COMPOSITION_DATA_MAX_BUNCH_LENGTH \
+ >= COMPOSITION_DATA_SIZE)) \
+ { \
+ coding->result = CODING_FINISH_INSUFFICIENT_CMP; \
+ goto label_end_of_loop; \
+ } \
+ coding->composing = (c1 == '0' ? COMPOSITION_RELATIVE \
+ : c1 == '2' ? COMPOSITION_WITH_RULE \
+ : c1 == '3' ? COMPOSITION_WITH_ALTCHARS \
+ : COMPOSITION_WITH_RULE_ALTCHARS); \
+ CODING_ADD_COMPOSITION_START (coding, coding->produced_char, \
+ coding->composing); \
+ coding->composition_rule_follows = 0; \
+ } \
+ else \
+ { \
+ /* We are already handling a composition. If the method is \
+ the following two, the codes following the current escape \
+ sequence are actual characters stored in a buffer. */ \
+ if (coding->composing == COMPOSITION_WITH_ALTCHARS \
+ || coding->composing == COMPOSITION_WITH_RULE_ALTCHARS) \
+ { \
+ coding->composing = COMPOSITION_RELATIVE; \
+ coding->composition_rule_follows = 0; \
+ } \
+ } \
} while (0)
/* Handle compositoin end sequence ESC 1. */
unsigned char *src_base;
int c, charset;
Lisp_Object translation_table;
+ Lisp_Object safe_chars;
+
+ safe_chars = coding_safe_chars (coding);
if (NILP (Venable_character_translation))
translation_table = Qnil;
goto label_invalid_code;
charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
ONE_MORE_BYTE (c1);
+ if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+ goto label_invalid_code;
break;
case 'O': /* invocation of single-shift-3 */
goto label_invalid_code;
charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
ONE_MORE_BYTE (c1);
+ if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0))
+ goto label_invalid_code;
break;
case '0': case '2': case '3': case '4': /* start composition */
*dst++ = c1 | 0x80; \
break; \
} \
- else if (coding->flags & CODING_FLAG_ISO_SAFE \
- && !coding->safe_charsets[charset]) \
- { \
- /* We should not encode this character, instead produce one or \
- two `?'s. */ \
- *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
- if (CHARSET_WIDTH (charset) == 2) \
- *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
- break; \
- } \
else \
/* Since CHARSET is not yet invoked to any graphic planes, we \
must invoke it, or, at first, designate it to some graphic \
*dst++ = c1 | 0x80, *dst++= c2 | 0x80; \
break; \
} \
- else if (coding->flags & CODING_FLAG_ISO_SAFE \
- && !coding->safe_charsets[charset]) \
- { \
- /* We should not encode this character, instead produce one or \
- two `?'s. */ \
- *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
- if (CHARSET_WIDTH (charset) == 2) \
- *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION; \
- break; \
- } \
else \
/* Since CHARSET is not yet invoked to any graphic planes, we \
must invoke it, or, at first, designate it to some graphic \
dst = encode_invocation_designation (charset, coding, dst); \
} while (1)
-#define ENCODE_ISO_CHARACTER(charset, c1, c2) \
+#define ENCODE_ISO_CHARACTER(c) \
+ do { \
+ int charset, c1, c2; \
+ \
+ SPLIT_CHAR (c, charset, c1, c2); \
+ if (CHARSET_DEFINED_P (charset)) \
+ { \
+ if (CHARSET_DIMENSION (charset) == 1) \
+ { \
+ if (charset == CHARSET_ASCII \
+ && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
+ charset = charset_latin_jisx0201; \
+ ENCODE_ISO_CHARACTER_DIMENSION1 (charset, c1); \
+ } \
+ else \
+ { \
+ if (charset == charset_jisx0208 \
+ && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
+ charset = charset_jisx0208_1978; \
+ ENCODE_ISO_CHARACTER_DIMENSION2 (charset, c1, c2); \
+ } \
+ } \
+ else \
+ { \
+ *dst++ = c1; \
+ if (c2 >= 0) \
+ *dst++ = c2; \
+ } \
+ } while (0)
+
+
+/* Instead of encoding character C, produce one or two `?'s. */
+
+#define ENCODE_UNSAFE_CHARACTER(c) \
do { \
- int alt_charset = charset; \
- \
- if (CHARSET_DEFINED_P (charset)) \
- { \
- if (CHARSET_DIMENSION (charset) == 1) \
- { \
- if (charset == CHARSET_ASCII \
- && coding->flags & CODING_FLAG_ISO_USE_ROMAN) \
- alt_charset = charset_latin_jisx0201; \
- ENCODE_ISO_CHARACTER_DIMENSION1 (alt_charset, c1); \
- } \
- else \
- { \
- if (charset == charset_jisx0208 \
- && coding->flags & CODING_FLAG_ISO_USE_OLDJIS) \
- alt_charset = charset_jisx0208_1978; \
- ENCODE_ISO_CHARACTER_DIMENSION2 (alt_charset, c1, c2); \
- } \
- } \
- else \
- { \
- *dst++ = c1; \
- if (c2 >= 0) \
- *dst++ = c2; \
- } \
+ ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \
+ if (CHARSET_WIDTH (CHAR_CHARSET (c)) > 1) \
+ ENCODE_ISO_CHARACTER (CODING_INHIBIT_CHARACTER_SUBSTITUTION); \
} while (0)
+
/* Produce designation and invocation codes at a place pointed by DST
to use CHARSET. The element `spec.iso2022' of *CODING is updated.
Return new DST. */
unsigned char *src_base;
int c;
Lisp_Object translation_table;
+ Lisp_Object safe_chars;
+
+ safe_chars = coding_safe_chars (coding);
if (NILP (Venable_character_translation))
translation_table = Qnil;
coding->errors = 0;
while (1)
{
- int charset, c1, c2;
-
src_base = src;
if (dst >= (dst_bytes ? adjusted_dst_end : (src - 19)))
}
else
{
- SPLIT_CHAR (c, charset, c1, c2);
- ENCODE_ISO_CHARACTER (charset, c1, c2);
+ if (coding->flags & CODING_FLAG_ISO_SAFE
+ && ! CODING_SAFE_CHAR_P (safe_chars, c))
+ ENCODE_UNSAFE_CHARACTER (c);
+ else
+ ENCODE_ISO_CHARACTER (c);
if (coding->composing == COMPOSITION_WITH_RULE_ALTCHARS)
coding->composition_rule_follows = 1;
}
}
}
else if (ASCII_BYTE_P (c))
- ENCODE_ISO_CHARACTER (CHARSET_ASCII, c, /* dummy */ c1);
+ ENCODE_ISO_CHARACTER (c);
else if (SINGLE_BYTE_CHAR_P (c))
{
*dst++ = c;
coding->errors++;
}
+ else if (coding->flags & CODING_FLAG_ISO_SAFE
+ && ! CODING_SAFE_CHAR_P (safe_chars, c))
+ ENCODE_UNSAFE_CHARACTER (c);
else
- {
- SPLIT_CHAR (c, charset, c1, c2);
- ENCODE_ISO_CHARACTER (charset, c1, c2);
- }
+ ENCODE_ISO_CHARACTER (c);
coding->consumed_char++;
}
while (1)
{
ONE_MORE_BYTE (c);
- if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
+ if (c >= 0x81)
{
- ONE_MORE_BYTE (c);
- if (c < 0x40)
+ if (c <= 0x9F || (c >= 0xE0 && c <= 0xEF))
+ {
+ ONE_MORE_BYTE (c);
+ if (c < 0x40 || c == 0x7F || c > 0xFC)
+ return 0;
+ }
+ else if (c > 0xDF)
return 0;
}
}
translation_table = Qnil;
else
{
- translation_table = coding->translation_table_for_decode;
+ translation_table = coding->translation_table_for_encode;
if (NILP (translation_table))
- translation_table = Vstandard_translation_table_for_decode;
+ translation_table = Vstandard_translation_table_for_encode;
}
while (1)
ENCODE_SJIS (c1, c2, c1, c2);
EMIT_TWO_BYTES (c1, c2);
}
+ else if (charset == charset_katakana_jisx0201)
+ EMIT_ONE_BYTE (c1 | 0x80);
else if (charset == charset_latin_jisx0201)
EMIT_ONE_BYTE (c1);
else
}
src_base = src;
label_end_of_loop:
+ ;
}
else
{
- if (src_bytes <= dst_bytes)
+ if (!dst_bytes || src_bytes <= dst_bytes)
{
safe_bcopy (src, dst, src_bytes);
src_base = src_end;
coding->consumed = src_base - source;
coding->produced = dst - destination;
+ coding->produced_char = coding->produced;
}
\f
else
goto label_invalid_coding_system;
- val = Fplist_get (plist, Qsafe_charsets);
- if (EQ (val, Qt))
- {
- for (i = 0; i <= MAX_CHARSET; i++)
- coding->safe_charsets[i] = 1;
- }
- else
- {
- bzero (coding->safe_charsets, MAX_CHARSET + 1);
- while (CONSP (val))
- {
- if ((i = get_charset_id (XCAR (val))) >= 0)
- coding->safe_charsets[i] = 1;
- val = XCDR (val);
- }
- }
-
/* If the coding system has non-nil `composition' property, enable
composition handling. */
val = Fplist_get (plist, Qcomposition);
if (reg_bits)
for (charset = 0; charset <= MAX_CHARSET; charset++)
{
- if (CHARSET_VALID_P (charset))
+ if (CHARSET_VALID_P (charset)
+ && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
+ == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
{
/* There exist some default graphic registers to be
- used CHARSET. */
+ used by CHARSET. */
/* We had better avoid designating a charset of
CHARS96 to REG 0 as far as possible. */
}
}
coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
+ coding->spec.ccl.cr_carryover = 0;
break;
case 5:
return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
}
-#ifndef MINIMUM_CONVERSION_BUFFER_SIZE
-#define MINIMUM_CONVERSION_BUFFER_SIZE 1024
-#endif
+/* Working buffer for code conversion. */
+struct conversion_buffer
+{
+ int size; /* size of data. */
+ int on_stack; /* 1 if allocated by alloca. */
+ unsigned char *data;
+};
-char *conversion_buffer;
-int conversion_buffer_size;
+/* Don't use alloca for allocating memory space larger than this, lest
+ we overflow their stack. */
+#define MAX_ALLOCA 16*1024
-/* Return a pointer to a SIZE bytes of buffer to be used for encoding
- or decoding. Sufficient memory is allocated automatically. If we
- run out of memory, return NULL. */
+/* Allocate LEN bytes of memory for BUF (struct conversion_buffer). */
+#define allocate_conversion_buffer(buf, len) \
+ do { \
+ if (len < MAX_ALLOCA) \
+ { \
+ buf.data = (unsigned char *) alloca (len); \
+ buf.on_stack = 1; \
+ } \
+ else \
+ { \
+ buf.data = (unsigned char *) xmalloc (len); \
+ buf.on_stack = 0; \
+ } \
+ buf.size = len; \
+ } while (0)
-char *
-get_conversion_buffer (size)
- int size;
+/* Double the allocated memory for *BUF. */
+static void
+extend_conversion_buffer (buf)
+ struct conversion_buffer *buf;
{
- if (size > conversion_buffer_size)
+ if (buf->on_stack)
{
- char *buf;
- int real_size = conversion_buffer_size * 2;
-
- while (real_size < size) real_size *= 2;
- buf = (char *) xmalloc (real_size);
- xfree (conversion_buffer);
- conversion_buffer = buf;
- conversion_buffer_size = real_size;
+ unsigned char *save = buf->data;
+ buf->data = (unsigned char *) xmalloc (buf->size * 2);
+ bcopy (save, buf->data, buf->size);
+ buf->on_stack = 0;
}
- return conversion_buffer;
+ else
+ {
+ buf->data = (unsigned char *) xrealloc (buf->data, buf->size * 2);
+ }
+ buf->size *= 2;
+}
+
+/* Free the allocated memory for BUF if it is not on stack. */
+static void
+free_conversion_buffer (buf)
+ struct conversion_buffer *buf;
+{
+ if (!buf->on_stack)
+ xfree (buf->data);
}
int
int result;
ccl->last_block = coding->mode & CODING_MODE_LAST_BLOCK;
-
+ if (encodep)
+ ccl->eol_type = coding->eol_type;
+ ccl->multibyte = coding->src_multibyte;
coding->produced = ccl_driver (ccl, source, destination,
src_bytes, dst_bytes, &(coding->consumed));
if (encodep)
switch (ccl->status)
{
case CCL_STAT_SUSPEND_BY_SRC:
- result = CODING_FINISH_INSUFFICIENT_SRC;
+ coding->result = CODING_FINISH_INSUFFICIENT_SRC;
break;
case CCL_STAT_SUSPEND_BY_DST:
- result = CODING_FINISH_INSUFFICIENT_DST;
+ coding->result = CODING_FINISH_INSUFFICIENT_DST;
break;
case CCL_STAT_QUIT:
case CCL_STAT_INVALID_CMD:
- result = CODING_FINISH_INTERRUPT;
+ coding->result = CODING_FINISH_INTERRUPT;
break;
default:
- result = CODING_FINISH_NORMAL;
+ coding->result = CODING_FINISH_NORMAL;
break;
}
- return result;
+ return coding->result;
+}
+
+/* Decode EOL format of the text at PTR of BYTES length destructively
+ according to CODING->eol_type. This is called after the CCL
+ program produced a decoded text at PTR. If we do CRLF->LF
+ conversion, update CODING->produced and CODING->produced_char. */
+
+static void
+decode_eol_post_ccl (coding, ptr, bytes)
+ struct coding_system *coding;
+ unsigned char *ptr;
+ int bytes;
+{
+ Lisp_Object val, saved_coding_symbol;
+ unsigned char *pend = ptr + bytes;
+ int dummy;
+
+ /* Remember the current coding system symbol. We set it back when
+ an inconsistent EOL is found so that `last-coding-system-used' is
+ set to the coding system that doesn't specify EOL conversion. */
+ saved_coding_symbol = coding->symbol;
+
+ coding->spec.ccl.cr_carryover = 0;
+ if (coding->eol_type == CODING_EOL_UNDECIDED)
+ {
+ /* Here, to avoid the call of setup_coding_system, we directly
+ call detect_eol_type. */
+ coding->eol_type = detect_eol_type (ptr, bytes, &dummy);
+ if (coding->eol_type == CODING_EOL_INCONSISTENT)
+ coding->eol_type = CODING_EOL_LF;
+ if (coding->eol_type != CODING_EOL_UNDECIDED)
+ {
+ val = Fget (coding->symbol, Qeol_type);
+ if (VECTORP (val) && XVECTOR (val)->size == 3)
+ coding->symbol = XVECTOR (val)->contents[coding->eol_type];
+ }
+ coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
+ }
+
+ if (coding->eol_type == CODING_EOL_LF
+ || coding->eol_type == CODING_EOL_UNDECIDED)
+ {
+ /* We have nothing to do. */
+ ptr = pend;
+ }
+ else if (coding->eol_type == CODING_EOL_CRLF)
+ {
+ unsigned char *pstart = ptr, *p = ptr;
+
+ if (! (coding->mode & CODING_MODE_LAST_BLOCK)
+ && *(pend - 1) == '\r')
+ {
+ /* If the last character is CR, we can't handle it here
+ because LF will be in the not-yet-decoded source text.
+ Recorded that the CR is not yet processed. */
+ coding->spec.ccl.cr_carryover = 1;
+ coding->produced--;
+ coding->produced_char--;
+ pend--;
+ }
+ while (ptr < pend)
+ {
+ if (*ptr == '\r')
+ {
+ if (ptr + 1 < pend && *(ptr + 1) == '\n')
+ {
+ *p++ = '\n';
+ ptr += 2;
+ }
+ else
+ {
+ if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ goto undo_eol_conversion;
+ *p++ = *ptr++;
+ }
+ }
+ else if (*ptr == '\n'
+ && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ goto undo_eol_conversion;
+ else
+ *p++ = *ptr++;
+ continue;
+
+ undo_eol_conversion:
+ /* We have faced with inconsistent EOL format at PTR.
+ Convert all LFs before PTR back to CRLFs. */
+ for (p--, ptr--; p >= pstart; p--)
+ {
+ if (*p == '\n')
+ *ptr-- = '\n', *ptr-- = '\r';
+ else
+ *ptr-- = *p;
+ }
+ /* If carryover is recorded, cancel it because we don't
+ convert CRLF anymore. */
+ if (coding->spec.ccl.cr_carryover)
+ {
+ coding->spec.ccl.cr_carryover = 0;
+ coding->produced++;
+ coding->produced_char++;
+ pend++;
+ }
+ p = ptr = pend;
+ coding->eol_type = CODING_EOL_LF;
+ coding->symbol = saved_coding_symbol;
+ }
+ if (p < pend)
+ {
+ /* As each two-byte sequence CRLF was converted to LF, (PEND
+ - P) is the number of deleted characters. */
+ coding->produced -= pend - p;
+ coding->produced_char -= pend - p;
+ }
+ }
+ else /* i.e. coding->eol_type == CODING_EOL_CR */
+ {
+ unsigned char *p = ptr;
+
+ for (; ptr < pend; ptr++)
+ {
+ if (*ptr == '\r')
+ *ptr = '\n';
+ else if (*ptr == '\n'
+ && coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
+ {
+ for (; p < ptr; p++)
+ {
+ if (*p == '\n')
+ *p = '\r';
+ }
+ ptr = pend;
+ coding->eol_type = CODING_EOL_LF;
+ coding->symbol = saved_coding_symbol;
+ }
+ }
+ }
}
/* See "GENERAL NOTES about `decode_coding_XXX ()' functions". Before
if (coding->type == coding_type_undecided)
detect_coding (coding, source, src_bytes);
- if (coding->eol_type == CODING_EOL_UNDECIDED)
+ if (coding->eol_type == CODING_EOL_UNDECIDED
+ && coding->type != coding_type_ccl)
detect_eol (coding, source, src_bytes);
coding->produced = coding->produced_char = 0;
break;
case coding_type_ccl:
- ccl_coding_driver (coding, source, destination,
+ if (coding->spec.ccl.cr_carryover)
+ {
+ /* Set the CR which is not processed by the previous call of
+ decode_eol_post_ccl in DESTINATION. */
+ *destination = '\r';
+ coding->produced++;
+ coding->produced_char++;
+ dst_bytes--;
+ }
+ ccl_coding_driver (coding, source,
+ destination + coding->spec.ccl.cr_carryover,
src_bytes, dst_bytes, 0);
+ if (coding->eol_type != CODING_EOL_LF)
+ decode_eol_post_ccl (coding, destination, coding->produced);
break;
default:
}
if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
+ && coding->mode & CODING_MODE_LAST_BLOCK
&& coding->consumed == src_bytes)
coding->result = CODING_FINISH_NORMAL;
unsigned char *dst = destination + coding->produced;
src_bytes -= coding->consumed;
- coding->errors++;
+ coding->errors++;
if (COMPOSING_P (coding))
DECODE_COMPOSITION_END ('1');
while (src_bytes--)
}
coding->consumed = coding->consumed_char = src - source;
coding->produced = dst - destination;
+ coding->result = CODING_FINISH_NORMAL;
}
if (!coding->dst_multibyte)
encode_eol (coding, source, destination, src_bytes, dst_bytes);
}
- if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
- && coding->consumed == src_bytes)
- coding->result = CODING_FINISH_NORMAL;
-
- if (coding->mode & CODING_MODE_LAST_BLOCK)
+ if (coding->mode & CODING_MODE_LAST_BLOCK
+ && coding->result == CODING_FINISH_INSUFFICIENT_SRC)
{
unsigned char *src = source + coding->consumed;
unsigned char *src_end = src + src_bytes;
coding->consumed = src_bytes;
}
coding->produced = coding->produced_char = dst - destination;
+ coding->result = CODING_FINISH_NORMAL;
}
+ if (coding->result == CODING_FINISH_INSUFFICIENT_SRC
+ && coding->consumed == src_bytes)
+ coding->result = CODING_FINISH_NORMAL;
+
return coding->result;
}
if (!NILP (CHAR_TABLE_REF (translation_table, i)))
break;
if (i < 128)
- /* Some ASCII character should be tranlsated. We give up
+ /* Some ASCII character should be translated. We give up
shrinking. */
return;
}
endp_orig = endp = begp + *end - *beg;
}
+ eol_conversion = (coding->eol_type == CODING_EOL_CR
+ || coding->eol_type == CODING_EOL_CRLF);
+
switch (coding->type)
{
case coding_type_sjis:
CODING->cmp_data points to a memory block for the informaiton. OBJ
is a buffer or a string, defaults to the current buffer. */
-static void
+void
coding_restore_composition (coding, obj)
struct coding_system *coding;
Lisp_Object obj;
{
int i;
- for (i = 0; i < cmp_data->used; i += cmp_data->data[i])
+ for (i = 0; i < cmp_data->used && cmp_data->data[i] > 0;
+ i += cmp_data->data[i])
{
int *data = cmp_data->data + i;
enum composition_method method = (enum composition_method) data[3];
if (replace)
{
int saved_from = from;
+ int saved_inhibit_modification_hooks;
prepare_to_modify_buffer (from, to, &from);
if (saved_from != from)
from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
len_byte = to_byte - from_byte;
}
+
+ /* The code conversion routine can not preserve text properties
+ for now. So, we must remove all text properties in the
+ region. Here, we must suppress all modification hooks. */
+ saved_inhibit_modification_hooks = inhibit_modification_hooks;
+ inhibit_modification_hooks = 1;
+ Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
+ inhibit_modification_hooks = saved_inhibit_modification_hooks;
}
if (! encodep && CODING_REQUIRE_DETECTION (coding))
{
detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
if (coding->type == coding_type_undecided)
- /* It seems that the text contains only ASCII, but we
- should not left it undecided because the deeper
- decoding routine (decode_coding) tries to detect the
- encodings again in vain. */
- coding->type = coding_type_emacs_mule;
+ {
+ /* It seems that the text contains only ASCII, but we
+ should not leave it undecided because the deeper
+ decoding routine (decode_coding) tries to detect the
+ encodings again in vain. */
+ coding->type = coding_type_emacs_mule;
+ coding->category_idx = CODING_CATEGORY_IDX_EMACS_MULE;
+ }
}
- if (coding->eol_type == CODING_EOL_UNDECIDED)
+ if (coding->eol_type == CODING_EOL_UNDECIDED
+ && coding->type != coding_type_ccl)
{
saved_coding_symbol = coding->symbol;
detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
}
/* Try to skip the heading and tailing ASCIIs. */
- {
- int from_byte_orig = from_byte, to_byte_orig = to_byte;
-
- if (from < GPT && GPT < to)
- move_gap_both (from, from_byte);
- SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
- if (from_byte == to_byte
- && (encodep || NILP (coding->post_read_conversion))
- && ! CODING_REQUIRE_FLUSHING (coding))
- {
- coding->produced = len_byte;
- coding->produced_char = len;
- if (!replace)
- /* We must record and adjust for this new text now. */
- adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
- return 0;
- }
+ if (coding->type != coding_type_ccl)
+ {
+ int from_byte_orig = from_byte, to_byte_orig = to_byte;
- head_skip = from_byte - from_byte_orig;
- tail_skip = to_byte_orig - to_byte;
- total_skip = head_skip + tail_skip;
- from += head_skip;
- to -= tail_skip;
- len -= total_skip; len_byte -= total_skip;
- }
+ if (from < GPT && GPT < to)
+ move_gap_both (from, from_byte);
+ SHRINK_CONVERSION_REGION (&from_byte, &to_byte, coding, NULL, encodep);
+ if (from_byte == to_byte
+ && (encodep || NILP (coding->post_read_conversion))
+ && ! CODING_REQUIRE_FLUSHING (coding))
+ {
+ coding->produced = len_byte;
+ coding->produced_char = len;
+ if (!replace)
+ /* We must record and adjust for this new text now. */
+ adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
+ return 0;
+ }
- /* The code conversion routine can not preserve text properties for
- now. So, we must remove all text properties in the region.
- Here, we must suppress all modification hooks. */
- if (replace)
- {
- int saved_inhibit_modification_hooks = inhibit_modification_hooks;
- inhibit_modification_hooks = 1;
- Fset_text_properties (make_number (from), make_number (to), Qnil, Qnil);
- inhibit_modification_hooks = saved_inhibit_modification_hooks;
+ head_skip = from_byte - from_byte_orig;
+ tail_skip = to_byte_orig - to_byte;
+ total_skip = head_skip + tail_skip;
+ from += head_skip;
+ to -= tail_skip;
+ len -= total_skip; len_byte -= total_skip;
}
/* For converion, we must put the gap before the text in addition to
if (encodep)
call2 (coding->pre_write_conversion, make_number (BEG), make_number (Z));
else
- call1 (coding->post_read_conversion, make_number (Z - BEG));
+ {
+ TEMP_SET_PT_BOTH (BEG, BEG_BYTE);
+ call1 (coding->post_read_conversion, make_number (Z - BEG));
+ }
inhibit_pre_post_conversion = 0;
- str = make_buffer_string (BEG, Z, 0);
+ str = make_buffer_string (BEG, Z, 1);
return unbind_to (count, str);
}
int nocopy;
{
int len;
- char *buf;
+ struct conversion_buffer buf;
int from, to, to_byte;
struct gcpro gcpro1;
Lisp_Object saved_coding_symbol;
int result;
+ int require_decoding;
+ int shrinked_bytes = 0;
+ Lisp_Object newstr;
+ int consumed, consumed_char, produced, produced_char;
from = 0;
to = XSTRING (str)->size;
if (coding->type == coding_type_undecided)
coding->type = coding_type_emacs_mule;
}
- if (coding->eol_type == CODING_EOL_UNDECIDED)
+ if (coding->eol_type == CODING_EOL_UNDECIDED
+ && coding->type != coding_type_ccl)
{
saved_coding_symbol = coding->symbol;
detect_eol (coding, XSTRING (str)->data, to_byte);
}
}
- if (! CODING_REQUIRE_DECODING (coding))
- {
- if (!STRING_MULTIBYTE (str))
- {
- str = Fstring_as_multibyte (str);
- nocopy = 1;
- }
- return (nocopy ? str : Fcopy_sequence (str));
- }
+ coding->src_multibyte = 0;
+ coding->dst_multibyte = (coding->type != coding_type_no_conversion
+ && coding->type != coding_type_raw_text);
+ require_decoding = CODING_REQUIRE_DECODING (coding);
if (STRING_MULTIBYTE (str))
{
/* Decoding routines expect the source text to be unibyte. */
str = Fstring_as_unibyte (str);
+ to_byte = STRING_BYTES (XSTRING (str));
nocopy = 1;
- coding->src_multibyte = 0;
}
- coding->dst_multibyte = 1;
-
- if (coding->composing != COMPOSITION_DISABLED)
- coding_allocate_composition_data (coding, from);
/* Try to skip the heading and tailing ASCIIs. */
- {
- int from_orig = from;
+ if (require_decoding && coding->type != coding_type_ccl)
+ {
+ SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+ 0);
+ if (from == to_byte)
+ require_decoding = 0;
+ shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
+ }
- SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
- 0);
- if (from == to_byte)
+ if (!require_decoding)
+ {
+ coding->consumed = STRING_BYTES (XSTRING (str));
+ coding->consumed_char = XSTRING (str)->size;
+ if (coding->dst_multibyte)
+ {
+ str = Fstring_as_multibyte (str);
+ nocopy = 1;
+ }
+ coding->produced = STRING_BYTES (XSTRING (str));
+ coding->produced_char = XSTRING (str)->size;
return (nocopy ? str : Fcopy_sequence (str));
- }
+ }
+ if (coding->composing != COMPOSITION_DISABLED)
+ coding_allocate_composition_data (coding, from);
len = decoding_buffer_size (coding, to_byte - from);
- len += from + STRING_BYTES (XSTRING (str)) - to_byte;
- GCPRO1 (str);
- buf = get_conversion_buffer (len);
- UNGCPRO;
+ allocate_conversion_buffer (buf, len);
- if (from > 0)
- bcopy (XSTRING (str)->data, buf, from);
- result = decode_coding (coding, XSTRING (str)->data + from,
- buf + from, to_byte - from, len);
- if (result == CODING_FINISH_INCONSISTENT_EOL)
+ consumed = consumed_char = produced = produced_char = 0;
+ while (1)
{
- /* We simply try to decode the whole string again but without
- eol-conversion this time. */
- coding->eol_type = CODING_EOL_LF;
- coding->symbol = saved_coding_symbol;
- coding_free_composition_data (coding);
- return decode_coding_string (str, coding, nocopy);
+ result = decode_coding (coding, XSTRING (str)->data + from + consumed,
+ buf.data + produced, to_byte - from - consumed,
+ buf.size - produced);
+ consumed += coding->consumed;
+ consumed_char += coding->consumed_char;
+ produced += coding->produced;
+ produced_char += coding->produced_char;
+ if (result == CODING_FINISH_NORMAL
+ || (result == CODING_FINISH_INSUFFICIENT_SRC
+ && coding->consumed == 0))
+ break;
+ if (result == CODING_FINISH_INSUFFICIENT_CMP)
+ coding_allocate_composition_data (coding, from + produced_char);
+ else if (result == CODING_FINISH_INSUFFICIENT_DST)
+ extend_conversion_buffer (&buf);
+ else if (result == CODING_FINISH_INCONSISTENT_EOL)
+ {
+ /* Recover the original EOL format. */
+ if (coding->eol_type == CODING_EOL_CR)
+ {
+ unsigned char *p;
+ for (p = buf.data; p < buf.data + produced; p++)
+ if (*p == '\n') *p = '\r';
+ }
+ else if (coding->eol_type == CODING_EOL_CRLF)
+ {
+ int num_eol = 0;
+ unsigned char *p0, *p1;
+ for (p0 = buf.data, p1 = p0 + produced; p0 < p1; p0++)
+ if (*p0 == '\n') num_eol++;
+ if (produced + num_eol >= buf.size)
+ extend_conversion_buffer (&buf);
+ for (p0 = buf.data + produced, p1 = p0 + num_eol; p0 > buf.data;)
+ {
+ *--p1 = *--p0;
+ if (*p0 == '\n') *--p1 = '\r';
+ }
+ produced += num_eol;
+ produced_char += num_eol;
+ }
+ coding->eol_type = CODING_EOL_LF;
+ coding->symbol = saved_coding_symbol;
+ }
}
- bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
- STRING_BYTES (XSTRING (str)) - to_byte);
+ coding->consumed = consumed;
+ coding->consumed_char = consumed_char;
+ coding->produced = produced;
+ coding->produced_char = produced_char;
- len = from + STRING_BYTES (XSTRING (str)) - to_byte;
- str = make_multibyte_string (buf, len + coding->produced_char,
- len + coding->produced);
+ if (coding->dst_multibyte)
+ newstr = make_uninit_multibyte_string (produced_char + shrinked_bytes,
+ produced + shrinked_bytes);
+ else
+ newstr = make_uninit_string (produced + shrinked_bytes);
+ if (from > 0)
+ bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
+ bcopy (buf.data, XSTRING (newstr)->data + from, produced);
+ if (shrinked_bytes > from)
+ bcopy (XSTRING (str)->data + to_byte,
+ XSTRING (newstr)->data + from + produced,
+ shrinked_bytes - from);
+ free_conversion_buffer (&buf);
if (coding->cmp_data && coding->cmp_data->used)
- coding_restore_composition (coding, str);
+ coding_restore_composition (coding, newstr);
coding_free_composition_data (coding);
if (SYMBOLP (coding->post_read_conversion)
&& !NILP (Ffboundp (coding->post_read_conversion)))
- str = run_pre_post_conversion_on_str (str, 0);
+ newstr = run_pre_post_conversion_on_str (newstr, coding, 0);
- return str;
+ return newstr;
}
Lisp_Object
int nocopy;
{
int len;
- char *buf;
+ struct conversion_buffer buf;
int from, to, to_byte;
struct gcpro gcpro1;
Lisp_Object saved_coding_symbol;
int result;
+ int shrinked_bytes = 0;
+ Lisp_Object newstr;
+ int consumed, consumed_char, produced, produced_char;
if (SYMBOLP (coding->pre_write_conversion)
&& !NILP (Ffboundp (coding->pre_write_conversion)))
- str = run_pre_post_conversion_on_str (str, 1);
+ str = run_pre_post_conversion_on_str (str, coding, 1);
from = 0;
to = XSTRING (str)->size;
to_byte = STRING_BYTES (XSTRING (str));
saved_coding_symbol = Qnil;
+
+ /* Encoding routines determine the multibyteness of the source text
+ by coding->src_multibyte. */
+ coding->src_multibyte = STRING_MULTIBYTE (str);
+ coding->dst_multibyte = 0;
if (! CODING_REQUIRE_ENCODING (coding))
{
+ coding->consumed = STRING_BYTES (XSTRING (str));
+ coding->consumed_char = XSTRING (str)->size;
if (STRING_MULTIBYTE (str))
{
str = Fstring_as_unibyte (str);
nocopy = 1;
}
+ coding->produced = STRING_BYTES (XSTRING (str));
+ coding->produced_char = XSTRING (str)->size;
return (nocopy ? str : Fcopy_sequence (str));
}
- /* Encoding routines determine the multibyteness of the source text
- by coding->src_multibyte. */
- coding->src_multibyte = STRING_MULTIBYTE (str);
- coding->dst_multibyte = 0;
-
if (coding->composing != COMPOSITION_DISABLED)
coding_save_composition (coding, from, to, str);
/* Try to skip the heading and tailing ASCIIs. */
- {
- int from_orig = from;
-
- SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
- 1);
- if (from == to_byte)
- return (nocopy ? str : Fcopy_sequence (str));
- }
+ if (coding->type != coding_type_ccl)
+ {
+ SHRINK_CONVERSION_REGION (&from, &to_byte, coding, XSTRING (str)->data,
+ 1);
+ if (from == to_byte)
+ return (nocopy ? str : Fcopy_sequence (str));
+ shrinked_bytes = from + (STRING_BYTES (XSTRING (str)) - to_byte);
+ }
len = encoding_buffer_size (coding, to_byte - from);
- len += from + STRING_BYTES (XSTRING (str)) - to_byte;
- GCPRO1 (str);
- buf = get_conversion_buffer (len);
- UNGCPRO;
+ allocate_conversion_buffer (buf, len);
+
+ consumed = consumed_char = produced = produced_char = 0;
+ while (1)
+ {
+ result = encode_coding (coding, XSTRING (str)->data + from + consumed,
+ buf.data + produced, to_byte - from - consumed,
+ buf.size - produced);
+ consumed += coding->consumed;
+ consumed_char += coding->consumed_char;
+ produced += coding->produced;
+ produced_char += coding->produced_char;
+ if (result == CODING_FINISH_NORMAL
+ || (result == CODING_FINISH_INSUFFICIENT_SRC
+ && coding->consumed == 0))
+ break;
+ /* Now result should be CODING_FINISH_INSUFFICIENT_DST. */
+ extend_conversion_buffer (&buf);
+ }
+ coding->consumed = consumed;
+ coding->consumed_char = consumed_char;
+ coding->produced = produced;
+ coding->produced_char = produced_char;
+
+ newstr = make_uninit_string (produced + shrinked_bytes);
if (from > 0)
- bcopy (XSTRING (str)->data, buf, from);
- result = encode_coding (coding, XSTRING (str)->data + from,
- buf + from, to_byte - from, len);
- bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
- STRING_BYTES (XSTRING (str)) - to_byte);
-
- len = from + STRING_BYTES (XSTRING (str)) - to_byte;
- str = make_unibyte_string (buf, len + coding->produced);
+ bcopy (XSTRING (str)->data, XSTRING (newstr)->data, from);
+ bcopy (buf.data, XSTRING (newstr)->data + from, produced);
+ if (shrinked_bytes > from)
+ bcopy (XSTRING (str)->data + to_byte,
+ XSTRING (newstr)->data + from + produced,
+ shrinked_bytes - from);
+
+ free_conversion_buffer (&buf);
coding_free_composition_data (coding);
- return str;
+ return newstr;
}
\f
!NILP (highest));
}
+/* Return an intersection of lists L1 and L2. */
+
+static Lisp_Object
+intersection (l1, l2)
+ Lisp_Object l1, l2;
+{
+ Lisp_Object val;
+
+ for (val = Qnil; CONSP (l1); l1 = XCDR (l1))
+ {
+ if (!NILP (Fmemq (XCAR (l1), l2)))
+ val = Fcons (XCAR (l1), val);
+ }
+ return val;
+}
+
+
+/* Subroutine for Fsafe_coding_systems_region_internal.
+
+ Return a list of coding systems that safely encode the multibyte
+ text between P and PEND. SAFE_CODINGS, if non-nil, is a list of
+ possible coding systems. If it is nil, it means that we have not
+ yet found any coding systems.
+
+ WORK_TABLE is a copy of the char-table Vchar_coding_system_table. An
+ element of WORK_TABLE is set to t once the element is looked up.
+
+ If a non-ASCII single byte char is found, set
+ *single_byte_char_found to 1. */
+
+static Lisp_Object
+find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found)
+ unsigned char *p, *pend;
+ Lisp_Object safe_codings, work_table;
+ int *single_byte_char_found;
+{
+ int c, len, idx;
+ Lisp_Object val;
+
+ while (p < pend)
+ {
+ c = STRING_CHAR_AND_LENGTH (p, pend - p, len);
+ p += len;
+ if (ASCII_BYTE_P (c))
+ /* We can ignore ASCII characters here. */
+ continue;
+ if (SINGLE_BYTE_CHAR_P (c))
+ *single_byte_char_found = 1;
+ if (NILP (safe_codings))
+ continue;
+ /* Check the safe coding systems for C. */
+ val = char_table_ref_and_index (work_table, c, &idx);
+ if (EQ (val, Qt))
+ /* This element was already checked. Ignore it. */
+ continue;
+ /* Remember that we checked this element. */
+ CHAR_TABLE_SET (work_table, make_number (idx), Qt);
+
+ /* If there are some safe coding systems for C and we have
+ already found the other set of coding systems for the
+ different characters, get the intersection of them. */
+ if (!EQ (safe_codings, Qt) && !NILP (val))
+ val = intersection (safe_codings, val);
+ safe_codings = val;
+ }
+ return safe_codings;
+}
+
+
+/* Return a list of coding systems that safely encode the text between
+ START and END. If the text contains only ASCII or is unibyte,
+ return t. */
+
+DEFUN ("find-coding-systems-region-internal",
+ Ffind_coding_systems_region_internal,
+ Sfind_coding_systems_region_internal, 2, 2, 0,
+ "Internal use only.")
+ (start, end)
+ Lisp_Object start, end;
+{
+ Lisp_Object work_table, safe_codings;
+ int non_ascii_p = 0;
+ int single_byte_char_found = 0;
+ unsigned char *p1, *p1end, *p2, *p2end, *p;
+ Lisp_Object args[2];
+
+ if (STRINGP (start))
+ {
+ if (!STRING_MULTIBYTE (start))
+ return Qt;
+ p1 = XSTRING (start)->data, p1end = p1 + STRING_BYTES (XSTRING (start));
+ p2 = p2end = p1end;
+ if (XSTRING (start)->size != STRING_BYTES (XSTRING (start)))
+ non_ascii_p = 1;
+ }
+ else
+ {
+ int from, to, stop;
+
+ CHECK_NUMBER_COERCE_MARKER (start, 0);
+ CHECK_NUMBER_COERCE_MARKER (end, 1);
+ if (XINT (start) < BEG || XINT (end) > Z || XINT (start) > XINT (end))
+ args_out_of_range (start, end);
+ if (NILP (current_buffer->enable_multibyte_characters))
+ return Qt;
+ from = CHAR_TO_BYTE (XINT (start));
+ to = CHAR_TO_BYTE (XINT (end));
+ stop = from < GPT_BYTE && GPT_BYTE < to ? GPT_BYTE : to;
+ p1 = BYTE_POS_ADDR (from), p1end = p1 + (stop - from);
+ if (stop == to)
+ p2 = p2end = p1end;
+ else
+ p2 = BYTE_POS_ADDR (stop), p2end = p2 + (to - stop);
+ if (XINT (end) - XINT (start) != to - from)
+ non_ascii_p = 1;
+ }
+
+ if (!non_ascii_p)
+ {
+ /* We are sure that the text contains no multibyte character.
+ Check if it contains eight-bit-graphic. */
+ p = p1;
+ for (p = p1; p < p1end && ASCII_BYTE_P (*p); p++);
+ if (p == p1end)
+ {
+ for (p = p2; p < p2end && ASCII_BYTE_P (*p); p++);
+ if (p == p2end)
+ return Qt;
+ }
+ }
+
+ /* The text contains non-ASCII characters. */
+ work_table = Fcopy_sequence (Vchar_coding_system_table);
+ safe_codings = find_safe_codings (p1, p1end, Qt, work_table,
+ &single_byte_char_found);
+ if (p2 < p2end)
+ safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table,
+ &single_byte_char_found);
+
+ if (!single_byte_char_found)
+ {
+ /* Append generic coding systems. */
+ Lisp_Object args[2];
+ args[0] = safe_codings;
+ args[1] = Fchar_table_extra_slot (Vchar_coding_system_table,
+ make_number (0));
+ safe_codings = Fappend (2, args);
+ }
+ else
+ safe_codings = Fcons (Qraw_text, Fcons (Qemacs_mule, safe_codings));
+ return safe_codings;
+}
+
+
Lisp_Object
code_convert_region1 (start, end, coding_system, encodep)
Lisp_Object start, end, coding_system;
\f
/*** 9. Post-amble ***/
-void
-init_coding ()
-{
- conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
-}
-
void
init_coding_once ()
{
iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
- conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
-
setup_coding_system (Qnil, &keyboard_coding);
setup_coding_system (Qnil, &terminal_coding);
setup_coding_system (Qnil, &safe_terminal_coding);
Qtranslation_table_for_encode = intern ("translation-table-for-encode");
staticpro (&Qtranslation_table_for_encode);
- Qsafe_charsets = intern ("safe-charsets");
- staticpro (&Qsafe_charsets);
+ Qsafe_chars = intern ("safe-chars");
+ staticpro (&Qsafe_chars);
+
+ Qchar_coding_system = intern ("char-coding-system");
+ staticpro (&Qchar_coding_system);
+
+ /* Intern this now in case it isn't already done.
+ Setting this variable twice is harmless.
+ But don't staticpro it here--that is done in alloc.c. */
+ Qchar_table_extra_slots = intern ("char-table-extra-slots");
+ Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0));
+ Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (1));
Qvalid_codes = intern ("valid-codes");
staticpro (&Qvalid_codes);
defsubr (&Scheck_coding_system);
defsubr (&Sdetect_coding_region);
defsubr (&Sdetect_coding_string);
+ defsubr (&Sfind_coding_systems_region_internal);
defsubr (&Sdecode_coding_region);
defsubr (&Sencode_coding_region);
defsubr (&Sdecode_coding_string);
"Coding system to use with system messages.");
Vlocale_coding_system = Qnil;
+ /* The eol mnemonics are reset in startup.el system-dependently. */
DEFVAR_LISP ("eol-mnemonic-unix", &eol_mnemonic_unix,
"*String displayed in mode line for UNIX-like (LF) end-of-line format.");
eol_mnemonic_unix = build_string (":");
The default value is `select-safe-coding-system' (which see).");
Vselect_safe_coding_system_function = Qnil;
+ DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table,
+ "Char-table containing safe coding systems of each characters.\n\
+Each element doesn't include such generic coding systems that can\n\
+encode any characters. They are in the first extra slot.");
+ Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil);
+
+ DEFVAR_BOOL ("inhibit-iso-escape-detection",
+ &inhibit_iso_escape_detection,
+ "If non-nil, Emacs ignores ISO2022's escape sequence on code detection.\n\
+\n\
+By default, on reading a file, Emacs tries to detect how the text is\n\
+encoded. This code detection is sensitive to escape sequences. If\n\
+the sequence is valid as ISO2022, the code is determined as one of\n\
+the ISO2022 encodings, and the file is decoded by the corresponding\n\
+coding system (e.g. `iso-2022-7bit').\n\
+\n\
+However, there may be a case that you want to read escape sequences in\n\
+a file as is. In such a case, you can set this variable to non-nil.\n\
+Then, as the code detection ignores any escape sequences, no file is\n\
+detected as encoded in some ISO2022 encoding. The result is that all\n\
+escape sequences become visible in a buffer.\n\
+\n\
+The default value is nil, and it is strongly recommended not to change\n\
+it. That is because many Emacs Lisp source files that contain\n\
+non-ASCII characters are encoded by the coding system `iso-2022-7bit'\n\
+in Emacs's distribution, and they won't be decoded correctly on\n\
+reading if you suppress escape sequence detection.\n\
+\n\
+The other way to read escape sequences in a file without decoding is\n\
+to explicitly specify some coding system that doesn't use ISO2022's\n\
+escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument].");
+ inhibit_iso_escape_detection = 0;
}
char *
}
#endif /* emacs */
+