X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/08b3caa982199bd7939d9d6877203ada5d0083b5..9f2554de935574cb1168b8de6fb3b38079bc4b80:/src/coding.c diff --git a/src/coding.c b/src/coding.c index 65754b4b1b..e292f80859 100644 --- a/src/coding.c +++ b/src/coding.c @@ -314,7 +314,7 @@ Lisp_Object Qcharset, Qiso_2022, Qutf_8, Qutf_16, Qshift_jis, Qbig5; Lisp_Object Qbig, Qlittle; Lisp_Object Qcoding_system_history; Lisp_Object Qvalid_codes; -Lisp_Object QCcategory, QCmnemonic, QCdefalut_char; +Lisp_Object QCcategory, QCmnemonic, QCdefault_char; Lisp_Object QCdecode_translation_table, QCencode_translation_table; Lisp_Object QCpost_read_conversion, QCpre_write_conversion; Lisp_Object QCascii_compatible_p; @@ -1326,7 +1326,7 @@ decode_coding_utf_8 (coding) const unsigned char *src_base; int *charbuf = coding->charbuf + coding->charbuf_used; int *charbuf_end = coding->charbuf + coding->charbuf_size; - int consumed_chars = 0, consumed_chars_base; + int consumed_chars = 0, consumed_chars_base = 0; int multibytep = coding->src_multibyte; enum utf_bom_type bom = CODING_UTF_8_BOM (coding); Lisp_Object attr, charset_list; @@ -1345,12 +1345,12 @@ decode_coding_utf_8 (coding) src = src_base; else { - ONE_MORE_BYTE (c2); + ONE_MORE_BYTE (c2); if (! UTF_8_EXTRA_OCTET_P (c2)) src = src_base; else { - ONE_MORE_BYTE (c3); + ONE_MORE_BYTE (c3); if (! UTF_8_EXTRA_OCTET_P (c3)) src = src_base; else @@ -1637,7 +1637,7 @@ decode_coding_utf_16 (coding) const unsigned char *src_base; int *charbuf = coding->charbuf + coding->charbuf_used; int *charbuf_end = coding->charbuf + coding->charbuf_size; - int consumed_chars = 0, consumed_chars_base; + int consumed_chars = 0, consumed_chars_base = 0; int multibytep = coding->src_multibyte; enum utf_bom_type bom = CODING_UTF_16_BOM (coding); enum utf_16_endian_type endian = CODING_UTF_16_ENDIAN (coding); @@ -2449,8 +2449,10 @@ encode_coding_emacs_mule (coding) if (preferred_charset_id >= 0) { charset = CHARSET_FROM_ID (preferred_charset_id); - if (! CHAR_CHARSET_P (c, charset)) - charset = char_charset (c, charset_list, NULL); + if (CHAR_CHARSET_P (c, charset)) + code = ENCODE_CHAR (charset, c); + else + charset = char_charset (c, charset_list, &code); } else charset = char_charset (c, charset_list, &code); @@ -2758,6 +2760,7 @@ detect_coding_iso_2022 (coding, detect_info) int i; int rejected = 0; int found = 0; + int composition_count = -1; detect_info->checked |= CATEGORY_MASK_ISO; @@ -2826,10 +2829,20 @@ detect_coding_iso_2022 (coding, detect_info) rejected |= CATEGORY_MASK_ISO_7BIT | CATEGORY_MASK_ISO_8BIT; break; } + else if (c == '1') + { + /* End of composition. */ + if (composition_count < 0 + || composition_count > MAX_COMPOSITION_COMPONENTS) + /* Invalid */ + break; + composition_count = -1; + found |= CATEGORY_MASK_ISO; + } else if (c >= '0' && c <= '4') { /* ESC for start/end composition. */ - found |= CATEGORY_MASK_ISO; + composition_count = 0; break; } else @@ -2900,6 +2913,8 @@ detect_coding_iso_2022 (coding, detect_info) continue; if (c < 0x80) { + if (composition_count >= 0) + composition_count++; single_shifting = 0; break; } @@ -2924,9 +2939,17 @@ detect_coding_iso_2022 (coding, detect_info) } if (i & 1 && src < src_end) - rejected |= CATEGORY_MASK_ISO_8_2; + { + rejected |= CATEGORY_MASK_ISO_8_2; + if (composition_count >= 0) + composition_count += i; + } else - found |= CATEGORY_MASK_ISO_8_2; + { + found |= CATEGORY_MASK_ISO_8_2; + if (composition_count >= 0) + composition_count += i / 2; + } } break; } @@ -3043,6 +3066,8 @@ detect_coding_iso_2022 (coding, detect_info) break; \ if (p == src_end - 1) \ { \ + if (coding->mode & CODING_MODE_LAST_BLOCK) \ + goto invalid_code; \ /* The current composition doesn't end in the current \ source. */ \ record_conversion_result \ @@ -3190,10 +3215,15 @@ decode_coding_iso_2022 (coding) if (composition_state == COMPOSING_RULE || composition_state == COMPOSING_COMPONENT_RULE) { - DECODE_COMPOSITION_RULE (c1); - components[component_idx++] = c1; - composition_state--; - continue; + if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1) + { + DECODE_COMPOSITION_RULE (c1); + components[component_idx++] = c1; + composition_state--; + continue; + } + /* Too long composition. */ + MAYBE_FINISH_COMPOSITION (); } } if (charset_id_0 < 0 @@ -3210,10 +3240,14 @@ decode_coding_iso_2022 (coding) if (composition_state == COMPOSING_RULE || composition_state == COMPOSING_COMPONENT_RULE) { - DECODE_COMPOSITION_RULE (c1); - components[component_idx++] = c1; - composition_state--; - continue; + if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1) + { + DECODE_COMPOSITION_RULE (c1); + components[component_idx++] = c1; + composition_state--; + continue; + } + MAYBE_FINISH_COMPOSITION (); } } if (charset_id_0 < 0) @@ -3571,11 +3605,20 @@ decode_coding_iso_2022 (coding) } else { - components[component_idx++] = c; - if (method == COMPOSITION_WITH_RULE - || (method == COMPOSITION_WITH_RULE_ALTCHARS - && composition_state == COMPOSING_COMPONENT_CHAR)) - composition_state++; + if (component_idx < MAX_COMPOSITION_COMPONENTS * 2 + 1) + { + components[component_idx++] = c; + if (method == COMPOSITION_WITH_RULE + || (method == COMPOSITION_WITH_RULE_ALTCHARS + && composition_state == COMPOSING_COMPONENT_CHAR)) + composition_state++; + } + else + { + MAYBE_FINISH_COMPOSITION (); + *charbuf++ = c; + char_offset++; + } } continue; @@ -4975,16 +5018,20 @@ detect_coding_charset (coding, detect_info) const unsigned char *src_end = coding->source + coding->src_bytes; int multibytep = coding->src_multibyte; int consumed_chars = 0; - Lisp_Object attrs, valids; + Lisp_Object attrs, valids, name; int found = 0; int head_ascii = coding->head_ascii; + int check_latin_extra = 0; detect_info->checked |= CATEGORY_MASK_CHARSET; coding = &coding_categories[coding_category_charset]; attrs = CODING_ID_ATTRS (coding->id); valids = AREF (attrs, coding_attr_charset_valids); - + name = CODING_ID_NAME (coding->id); + if (VECTORP (Vlatin_extra_code_table) + && strcmp ((char *) SDATA (SYMBOL_NAME (name)), "iso-8859-")) + check_latin_extra = 1; if (! NILP (CODING_ATTR_ASCII_COMPAT (attrs))) src += head_ascii; @@ -5003,7 +5050,13 @@ detect_coding_charset (coding, detect_info) if (NILP (val)) break; if (c >= 0x80) - found = CATEGORY_MASK_CHARSET; + { + if (c < 0xA0 + && check_latin_extra + && NILP (XVECTOR (Vlatin_extra_code_table)->contents[c])) + break; + found = CATEGORY_MASK_CHARSET; + } if (INTEGERP (val)) { charset = CHARSET_FROM_ID (XFASTINT (val)); @@ -5109,7 +5162,7 @@ decode_coding_charset (coding) code = c; val = AREF (valids, c); - if (NILP (val)) + if (! INTEGERP (val) && ! CONSP (val)) goto invalid_code; if (INTEGERP (val)) { @@ -6279,7 +6332,7 @@ produce_chars (coding, translation_table, last_block) if (coding->src_multibyte) { int multibytep = 1; - EMACS_INT consumed_chars; + EMACS_INT consumed_chars = 0; while (1) { @@ -6972,13 +7025,17 @@ make_conversion_work_buffer (multibyte) } else { - name = Vcode_conversion_workbuf_name; - workbuf = Fget_buffer_create (name); - if (NILP (Vcode_conversion_reused_workbuf)) - Vcode_conversion_reused_workbuf = workbuf; + if (NILP (Fbuffer_live_p (Vcode_conversion_reused_workbuf))) + Vcode_conversion_reused_workbuf + = Fget_buffer_create (Vcode_conversion_workbuf_name); + workbuf = Vcode_conversion_reused_workbuf; } current = current_buffer; set_buffer_internal (XBUFFER (workbuf)); + /* We can't allow modification hooks to run in the work buffer. For + instance, directory_files_internal assumes that file decoding + doesn't compile new regexps. */ + Fset (Fmake_local_variable (Qinhibit_modification_hooks), Qt); Ferase_buffer (); current_buffer->undo_list = Qt; current_buffer->enable_multibyte_characters = multibyte ? Qt : Qnil; @@ -7639,7 +7696,7 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, { const unsigned char *src_end = src + src_bytes; Lisp_Object attrs, eol_type; - Lisp_Object val; + Lisp_Object val = Qnil; struct coding_system coding; int id; struct coding_detection_info detect_info; @@ -7804,7 +7861,6 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, { int mask = detect_info.rejected | detect_info.found; int found = 0; - val = Qnil; for (i = coding_category_raw_text - 1; i >= 0; i--) { @@ -7867,7 +7923,7 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, /* Then, detect eol-format if necessary. */ { - int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol; + int normal_eol = -1, utf_16_be_eol = -1, utf_16_le_eol = -1; Lisp_Object tail; if (VECTORP (eol_type)) @@ -7933,7 +7989,7 @@ detect_coding_system (src, src_chars, src_bytes, highest, multibytep, } } - return (highest ? XCAR (val) : val); + return (highest ? (CONSP (val) ? XCAR (val) : Qnil) : val); } @@ -8432,7 +8488,8 @@ START and END are buffer positions. Optional 4th arguments DESTINATION specifies where the decoded text goes. If nil, the region between START and END is replaced by the decoded text. -If buffer, the decoded text is inserted in the buffer. +If buffer, the decoded text is inserted in that buffer after point (point +does not move). In those cases, the length of the decoded text is returned. If DESTINATION is t, the decoded text is returned. @@ -8454,7 +8511,8 @@ START and END are buffer positions. Optional 4th arguments DESTINATION specifies where the encoded text goes. If nil, the region between START and END is replace by the encoded text. -If buffer, the encoded text is inserted in the buffer. +If buffer, the encoded text is inserted in that buffer after point (point +does not move). In those cases, the length of the encoded text is returned. If DESTINATION is t, the encoded text is returned. @@ -8534,8 +8592,8 @@ Optional third arg NOCOPY non-nil means it is OK to return STRING itself if the decoding operation is trivial. Optional fourth arg BUFFER non-nil means that the decoded text is -inserted in BUFFER instead of returned as a string. In this case, -the return value is the length of the decoded text. +inserted in that buffer after point (point does not move). In this +case, the return value is the length of the decoded text. This function sets `last-coding-system-used' to the precise coding system used (which may be different from CODING-SYSTEM if CODING-SYSTEM is @@ -8555,8 +8613,8 @@ Optional third arg NOCOPY non-nil means it is OK to return STRING itself if the encoding operation is trivial. Optional fourth arg BUFFER non-nil means that the encoded text is -inserted in BUFFER instead of returned as a string. In this case, -the return value is the length of the encoded text. +inserted in that buffer after point (point does not move). In this +case, the return value is the length of the encoded text. This function sets `last-coding-system-used' to the precise coding system used (which may be different from CODING-SYSTEM if CODING-SYSTEM is @@ -9563,7 +9621,7 @@ DEFUN ("coding-system-put", Fcoding_system_put, Scoding_system_put, CHECK_CHARACTER (val); CODING_ATTR_MNEMONIC (attrs) = val; } - else if (EQ (prop, QCdefalut_char)) + else if (EQ (prop, QCdefault_char)) { if (NILP (val)) val = make_number (' '); @@ -9869,7 +9927,7 @@ syms_of_coding () DEFSYM (QCcategory, ":category"); DEFSYM (QCmnemonic, ":mnemonic"); - DEFSYM (QCdefalut_char, ":default-char"); + DEFSYM (QCdefault_char, ":default-char"); DEFSYM (QCdecode_translation_table, ":decode-translation-table"); DEFSYM (QCencode_translation_table, ":encode-translation-table"); DEFSYM (QCpost_read_conversion, ":post-read-conversion");