Lisp_Object Qbig, Qlittle;
Lisp_Object Qcoding_system_history;
Lisp_Object Qvalid_codes;
-Lisp_Object QCcategory, QCmnemonic, QCdefalut_char;
+Lisp_Object QCcategory, QCmnemonic, QCdefault_char;
Lisp_Object QCdecode_translation_table, QCencode_translation_table;
Lisp_Object QCpost_read_conversion, QCpre_write_conversion;
Lisp_Object QCascii_compatible_p;
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
int *charbuf_end = coding->charbuf + coding->charbuf_size;
- int consumed_chars = 0, consumed_chars_base;
+ int consumed_chars = 0, consumed_chars_base = 0;
int multibytep = coding->src_multibyte;
enum utf_bom_type bom = CODING_UTF_8_BOM (coding);
Lisp_Object attr, charset_list;
src = src_base;
else
{
- ONE_MORE_BYTE (c2);
+ ONE_MORE_BYTE (c2);
if (! UTF_8_EXTRA_OCTET_P (c2))
src = src_base;
else
{
- ONE_MORE_BYTE (c3);
+ ONE_MORE_BYTE (c3);
if (! UTF_8_EXTRA_OCTET_P (c3))
src = src_base;
else
const unsigned char *src_base;
int *charbuf = coding->charbuf + coding->charbuf_used;
int *charbuf_end = coding->charbuf + coding->charbuf_size;
- int consumed_chars = 0, consumed_chars_base;
+ int consumed_chars = 0, consumed_chars_base = 0;
int multibytep = coding->src_multibyte;
enum utf_bom_type bom = CODING_UTF_16_BOM (coding);
enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding);
if (preferred_charset_id >= 0)
{
charset = CHARSET_FROM_ID (preferred_charset_id);
- if (! CHAR_CHARSET_P (c, charset))
- charset = char_charset (c, charset_list, NULL);
+ if (CHAR_CHARSET_P (c, charset))
+ code = ENCODE_CHAR (charset, c);
+ else
+ charset = char_charset (c, charset_list, &code);
}
else
charset = char_charset (c, charset_list, &code);
int i;
int rejected = 0;
int found = 0;
+ int composition_count = -1;
detect_info->checked |= CATEGORY_MASK_ISO;
rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT;
break;
}
+ else if (c == '1')
+ {
+ /* End of composition. */
+ if (composition_count < 0
+ || composition_count > MAX_COMPOSITION_COMPONENTS)
+ /* Invalid */
+ break;
+ composition_count = -1;
+ found |= CATEGORY_MASK_ISO;
+ }
else if (c >= '0' && c <= '4')
{
/* ESC <Fp> for start/end composition. */
- found |= CATEGORY_MASK_ISO;
+ composition_count = 0;
break;
}
else
continue;
if (c < 0x80)
{
+ if (composition_count >= 0)
+ composition_count++;
single_shifting = 0;
break;
}
}
if (i & 1 && src < src_end)
- rejected |= CATEGORY_MASK_ISO_8_2;
+ {
+ rejected |= CATEGORY_MASK_ISO_8_2;
+ if (composition_count >= 0)
+ composition_count += i;
+ }
else
- found |= CATEGORY_MASK_ISO_8_2;
+ {
+ found |= CATEGORY_MASK_ISO_8_2;
+ if (composition_count >= 0)
+ composition_count += i / 2;
+ }
}
break;
}
break; \
if (p == src_end - 1) \
{ \
+ if (coding->mode & CODING_MODE_LAST_BLOCK) \
+ goto invalid_code; \
/* The current composition doesn't end in the current \
source. */ \
record_conversion_result \
if (composition_state == COMPOSING_RULE
|| composition_state == COMPOSING_COMPONENT_RULE)
{
- DECODE_COMPOSITION_RULE (c1);
- components[component_idx++] = c1;
- composition_state--;
- continue;
+ if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
+ {
+ DECODE_COMPOSITION_RULE (c1);
+ components[component_idx++] = c1;
+ composition_state--;
+ continue;
+ }
+ /* Too long composition. */
+ MAYBE_FINISH_COMPOSITION ();
}
}
if (charset_id_0 < 0
if (composition_state == COMPOSING_RULE
|| composition_state == COMPOSING_COMPONENT_RULE)
{
- DECODE_COMPOSITION_RULE (c1);
- components[component_idx++] = c1;
- composition_state--;
- continue;
+ if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
+ {
+ DECODE_COMPOSITION_RULE (c1);
+ components[component_idx++] = c1;
+ composition_state--;
+ continue;
+ }
+ MAYBE_FINISH_COMPOSITION ();
}
}
if (charset_id_0 < 0)
}
else
{
- components[component_idx++] = c;
- if (method == COMPOSITION_WITH_RULE
- || (method == COMPOSITION_WITH_RULE_ALTCHARS
- && composition_state == COMPOSING_COMPONENT_CHAR))
- composition_state++;
+ if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1)
+ {
+ components[component_idx++] = c;
+ if (method == COMPOSITION_WITH_RULE
+ || (method == COMPOSITION_WITH_RULE_ALTCHARS
+ && composition_state == COMPOSING_COMPONENT_CHAR))
+ composition_state++;
+ }
+ else
+ {
+ MAYBE_FINISH_COMPOSITION ();
+ *charbuf++ = c;
+ char_offset++;
+ }
}
continue;
const unsigned char *src_end = coding->source + coding->src_bytes;
int multibytep = coding->src_multibyte;
int consumed_chars = 0;
- Lisp_Object attrs, valids;
+ Lisp_Object attrs, valids, name;
int found = 0;
int head_ascii = coding->head_ascii;
+ int check_latin_extra = 0;
detect_info->checked |= CATEGORY_MASK_CHARSET;
coding = &coding_categories[coding_category_charset];
attrs = CODING_ID_ATTRS (coding->id);
valids = AREF (attrs, coding_attr_charset_valids);
-
+ name = CODING_ID_NAME (coding->id);
+ if (VECTORP (Vlatin_extra_code_table)
+ && strcmp ((char *) SDATA (SYMBOL_NAME (name)), "iso-8859-"))
+ check_latin_extra = 1;
if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs)))
src += head_ascii;
if (NILP (val))
break;
if (c >= 0x80)
- found = CATEGORY_MASK_CHARSET;
+ {
+ if (c < 0xA0
+ && check_latin_extra
+ && NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
+ break;
+ found = CATEGORY_MASK_CHARSET;
+ }
if (INTEGERP (val))
{
charset = CHARSET_FROM_ID (XFASTINT (val));
code = c;
val = AREF (valids, c);
- if (NILP (val))
+ if (! INTEGERP (val) && ! CONSP (val))
goto invalid_code;
if (INTEGERP (val))
{
if (coding->src_multibyte)
{
int multibytep = 1;
- EMACS_INT consumed_chars;
+ EMACS_INT consumed_chars = 0;
while (1)
{
}
else
{
- name = Vcode_conversion_workbuf_name;
- workbuf = Fget_buffer_create (name);
- if (NILP (Vcode_conversion_reused_workbuf))
- Vcode_conversion_reused_workbuf = workbuf;
+ if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf)))
+ Vcode_conversion_reused_workbuf
+ = Fget_buffer_create (Vcode_conversion_workbuf_name);
+ workbuf = Vcode_conversion_reused_workbuf;
}
current = current_buffer;
set_buffer_internal (XBUFFER (workbuf));
+ /* We can't allow modification hooks to run in the work buffer. For
+ instance, directory_files_internal assumes that file decoding
+ doesn't compile new regexps. */
+ Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt);
Ferase_buffer ();
current_buffer->undo_list = Qt;
current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil;
{
const unsigned char *src_end = src + src_bytes;
Lisp_Object attrs, eol_type;
- Lisp_Object val;
+ Lisp_Object val = Qnil;
struct coding_system coding;
int id;
struct coding_detection_info detect_info;
{
int mask = detect_info.rejected | detect_info.found;
int found = 0;
- val = Qnil;
for (i = coding_category_raw_text - 1; i >= 0; i--)
{
/* Then, detect eol-format if necessary. */
{
- int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol;
+ int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1;
Lisp_Object tail;
if (VECTORP (eol_type))
}
}
- return (highest ? XCAR (val) : val);
+ return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val);
}
Optional 4th arguments DESTINATION specifies where the decoded text goes.
If nil, the region between START and END is replaced by the decoded text.
-If buffer, the decoded text is inserted in the buffer.
+If buffer, the decoded text is inserted in that buffer after point (point
+does not move).
In those cases, the length of the decoded text is returned.
If DESTINATION is t, the decoded text is returned.
Optional 4th arguments DESTINATION specifies where the encoded text goes.
If nil, the region between START and END is replace by the encoded text.
-If buffer, the encoded text is inserted in the buffer.
+If buffer, the encoded text is inserted in that buffer after point (point
+does not move).
In those cases, the length of the encoded text is returned.
If DESTINATION is t, the encoded text is returned.
if the decoding operation is trivial.
Optional fourth arg BUFFER non-nil means that the decoded text is
-inserted in BUFFER instead of returned as a string. In this case,
-the return value is the length of the decoded text.
+inserted in that buffer after point (point does not move). In this
+case, the return value is the length of the decoded text.
This function sets `last-coding-system-used' to the precise coding system
used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
itself if the encoding operation is trivial.
Optional fourth arg BUFFER non-nil means that the encoded text is
-inserted in BUFFER instead of returned as a string. In this case,
-the return value is the length of the encoded text.
+inserted in that buffer after point (point does not move). In this
+case, the return value is the length of the encoded text.
This function sets `last-coding-system-used' to the precise coding system
used (which may be different from CODING-SYSTEM if CODING-SYSTEM is
CHECK_CHARACTER (val);
CODING_ATTR_MNEMONIC (attrs) = val;
}
- else if (EQ (prop, QCdefalut_char))
+ else if (EQ (prop, QCdefault_char))
{
if (NILP (val))
val = make_number (' ');
DEFSYM (QCcategory, ":category");
DEFSYM (QCmnemonic, ":mnemonic");
- DEFSYM (QCdefalut_char, ":default-char");
+ DEFSYM (QCdefault_char, ":default-char");
DEFSYM (QCdecode_translation_table, ":decode-translation-table");
DEFSYM (QCencode_translation_table, ":encode-translation-table");
DEFSYM (QCpost_read_conversion, ":post-read-conversion");