X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/2ed909207ee7afb93284d113526071e83fc1a5c1..3fa2054efdfa3c22456072254e6c67682a595233:/src/coding.c diff --git a/src/coding.c b/src/coding.c index c4aaefa818..c10fb37567 100644 --- a/src/coding.c +++ b/src/coding.c @@ -493,6 +493,8 @@ enum iso_code_class_type #define CODING_ISO_FLAG_USE_OLDJIS 0x10000 +#define CODING_ISO_FLAG_LEVEL_4 0x20000 + #define CODING_ISO_FLAG_FULL_SUPPORT 0x100000 /* A character to be produced on output if encoding of the original @@ -649,6 +651,23 @@ static struct coding_system coding_categories[coding_category_max]; #define max(a, b) ((a) > (b) ? (a) : (b)) #endif +/* Encode a flag that can be nil, something else, or t as -1, 0, 1. */ + +static int +encode_inhibit_flag (Lisp_Object flag) +{ + return NILP (flag) ? -1 : EQ (flag, Qt); +} + +/* True if the value of ENCODED_FLAG says a flag should be treated as set. + 1 means yes, -1 means no, 0 means ask the user variable VAR. */ + +static bool +inhibit_flag (int encoded_flag, bool var) +{ + return 0 < encoded_flag + var; +} + #define CODING_GET_INFO(coding, attrs, charset_list) \ do { \ (attrs) = CODING_ID_ATTRS ((coding)->id); \ @@ -1346,6 +1365,45 @@ decode_coding_utf_8 (struct coding_system *coding) break; } + /* In the simple case, rapidly handle ordinary characters */ + if (multibytep && ! eol_dos + && charbuf < charbuf_end - 6 && src < src_end - 6) + { + while (charbuf < charbuf_end - 6 && src < src_end - 6) + { + c1 = *src; + if (c1 & 0x80) + break; + src++; + consumed_chars++; + *charbuf++ = c1; + + c1 = *src; + if (c1 & 0x80) + break; + src++; + consumed_chars++; + *charbuf++ = c1; + + c1 = *src; + if (c1 & 0x80) + break; + src++; + consumed_chars++; + *charbuf++ = c1; + + c1 = *src; + if (c1 & 0x80) + break; + src++; + consumed_chars++; + *charbuf++ = c1; + } + /* If we handled at least one character, restart the main loop. */ + if (src != src_base) + continue; + } + if (byte_after_cr >= 0) c1 = byte_after_cr, byte_after_cr = -1; else @@ -3716,7 +3774,10 @@ decode_coding_iso_2022 (struct coding_system *coding) else charset = CHARSET_FROM_ID (charset_id_2); ONE_MORE_BYTE (c1); - if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)) + if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0) + || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) + && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4) + ? c1 >= 0x80 : c1 < 0x80))) goto invalid_code; break; @@ -3730,7 +3791,10 @@ decode_coding_iso_2022 (struct coding_system *coding) else charset = CHARSET_FROM_ID (charset_id_3); ONE_MORE_BYTE (c1); - if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0)) + if (c1 < 0x20 || (c1 >= 0x80 && c1 < 0xA0) + || (! (CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_SEVEN_BITS) + && ((CODING_ISO_FLAGS (coding) & CODING_ISO_FLAG_LEVEL_4) + ? c1 >= 0x80 : c1 < 0x80))) goto invalid_code; break; @@ -5706,17 +5770,11 @@ setup_coding_system (Lisp_Object coding_system, struct coding_system *coding) coding->encoder = encode_coding_raw_text; coding->common_flags |= CODING_REQUIRE_DETECTION_MASK; coding->spec.undecided.inhibit_nbd - = (NILP (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection)) - ? -1 - : EQ (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection), Qt) - ? 1 - : 0); + = (encode_inhibit_flag + (AREF (attrs, coding_attr_undecided_inhibit_null_byte_detection))); coding->spec.undecided.inhibit_ied - = (NILP (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection)) - ? -1 - : EQ (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection), Qt) - ? 1 - : 0); + = (encode_inhibit_flag + (AREF (attrs, coding_attr_undecided_inhibit_iso_escape_detection))); coding->spec.undecided.prefer_utf_8 = ! NILP (AREF (attrs, coding_attr_undecided_prefer_utf_8)); } @@ -6476,16 +6534,11 @@ detect_coding (struct coding_system *coding) int c, i; struct coding_detection_info detect_info; bool null_byte_found = 0, eight_bit_found = 0; - int inhibit_nbd /* null byte detection */ - = (coding->spec.undecided.inhibit_nbd > 0 - | (coding->spec.undecided.inhibit_nbd == 0 - & inhibit_null_byte_detection)); - int inhibit_ied /* iso escape detection */ - = (coding->spec.undecided.inhibit_ied > 0 - | (coding->spec.undecided.inhibit_ied == 0 - & inhibit_iso_escape_detection)); - int prefer_utf_8 - = coding->spec.undecided.prefer_utf_8; + bool inhibit_nbd = inhibit_flag (coding->spec.undecided.inhibit_nbd, + inhibit_null_byte_detection); + bool inhibit_ied = inhibit_flag (coding->spec.undecided.inhibit_ied, + inhibit_iso_escape_detection); + bool prefer_utf_8 = coding->spec.undecided.prefer_utf_8; coding->head_ascii = 0; detect_info.checked = detect_info.found = detect_info.rejected = 0; @@ -6858,11 +6911,9 @@ get_translation_table (Lisp_Object attrs, bool encodep, int *max_lookup) if (CHAR_TABLE_P (standard)) { if (CONSP (translation_table)) - translation_table = nconc2 (translation_table, - Fcons (standard, Qnil)); + translation_table = nconc2 (translation_table, list1 (standard)); else - translation_table = Fcons (translation_table, - Fcons (standard, Qnil)); + translation_table = list2 (translation_table, standard); } } @@ -7446,7 +7497,7 @@ handle_composition_annotation (ptrdiff_t pos, ptrdiff_t limit, /* We found a composition. Store the corresponding annotation data in BUF. */ int *head = buf; - enum composition_method method = COMPOSITION_METHOD (prop); + enum composition_method method = composition_method (prop); int nchars = COMPOSITION_LENGTH (prop); ADD_COMPOSITION_DATA (buf, nchars, 0, method); @@ -7787,7 +7838,7 @@ make_conversion_work_buffer (bool multibyte) } -static Lisp_Object +static void code_conversion_restore (Lisp_Object arg) { Lisp_Object current, workbuf; @@ -7805,7 +7856,6 @@ code_conversion_restore (Lisp_Object arg) } set_buffer_internal (XBUFFER (current)); UNGCPRO; - return Qnil; } Lisp_Object @@ -8544,17 +8594,11 @@ detect_coding_system (const unsigned char *src, enum coding_category category IF_LINT (= 0); struct coding_system *this IF_LINT (= NULL); int c, i; - int inhibit_nbd /* null byte detection */ - = (coding.spec.undecided.inhibit_nbd > 0 - | (coding.spec.undecided.inhibit_nbd == 0 - & inhibit_null_byte_detection)); - int inhibit_ied /* iso escape detection */ - = (coding.spec.undecided.inhibit_ied > 0 - | (coding.spec.undecided.inhibit_ied == 0 - & inhibit_iso_escape_detection)); - int prefer_utf_8 - = coding.spec.undecided.prefer_utf_8; - + bool inhibit_nbd = inhibit_flag (coding.spec.undecided.inhibit_nbd, + inhibit_null_byte_detection); + bool inhibit_ied = inhibit_flag (coding.spec.undecided.inhibit_ied, + inhibit_iso_escape_detection); + bool prefer_utf_8 = coding.spec.undecided.prefer_utf_8; /* Skip all ASCII bytes except for a few ISO2022 controls. */ for (; src < src_end; src++) @@ -8667,20 +8711,20 @@ detect_coding_system (const unsigned char *src, { detect_info.found = CATEGORY_MASK_RAW_TEXT; id = CODING_SYSTEM_ID (Qno_conversion); - val = Fcons (make_number (id), Qnil); + val = list1 (make_number (id)); } else if (! detect_info.rejected && ! detect_info.found) { detect_info.found = CATEGORY_MASK_ANY; id = coding_categories[coding_category_undecided].id; - val = Fcons (make_number (id), Qnil); + val = list1 (make_number (id)); } else if (highest) { if (detect_info.found) { detect_info.found = 1 << category; - val = Fcons (make_number (this->id), Qnil); + val = list1 (make_number (this->id)); } else for (i = 0; i < coding_category_raw_text; i++) @@ -8688,7 +8732,7 @@ detect_coding_system (const unsigned char *src, { detect_info.found = 1 << coding_priorities[i]; id = coding_categories[coding_priorities[i]].id; - val = Fcons (make_number (id), Qnil); + val = list1 (make_number (id)); break; } } @@ -8705,7 +8749,7 @@ detect_coding_system (const unsigned char *src, found |= 1 << category; id = coding_categories[category].id; if (id >= 0) - val = Fcons (make_number (id), val); + val = list1 (make_number (id)); } } for (i = coding_category_raw_text - 1; i >= 0; i--) @@ -8730,7 +8774,7 @@ detect_coding_system (const unsigned char *src, this = coding_categories + coding_category_utf_8_sig; else this = coding_categories + coding_category_utf_8_nosig; - val = Fcons (make_number (this->id), Qnil); + val = list1 (make_number (this->id)); } } else if (base_category == coding_category_utf_16_auto) @@ -8747,13 +8791,13 @@ detect_coding_system (const unsigned char *src, this = coding_categories + coding_category_utf_16_be_nosig; else this = coding_categories + coding_category_utf_16_le_nosig; - val = Fcons (make_number (this->id), Qnil); + val = list1 (make_number (this->id)); } } else { detect_info.found = 1 << XINT (CODING_ATTR_CATEGORY (attrs)); - val = Fcons (make_number (coding.id), Qnil); + val = list1 (make_number (coding.id)); } /* Then, detect eol-format if necessary. */ @@ -9224,7 +9268,7 @@ is nil. */) attrs = AREF (CODING_SYSTEM_SPEC (elt), 0); ASET (attrs, coding_attr_trans_tbl, get_translation_table (attrs, 1, NULL)); - list = Fcons (Fcons (elt, Fcons (attrs, Qnil)), list); + list = Fcons (list2 (elt, attrs), list); } if (STRINGP (start)) @@ -9635,7 +9679,7 @@ DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_intern tset_charset_list (term, (terminal_coding->common_flags & CODING_REQUIRE_ENCODING_MASK ? coding_charset_list (terminal_coding) - : Fcons (make_number (charset_ascii), Qnil))); + : list1 (make_number (charset_ascii)))); return Qnil; } @@ -10080,9 +10124,9 @@ usage: (define-coding-system-internal ...) */) { dim2 = CHARSET_DIMENSION (CHARSET_FROM_ID (XFASTINT (tmp))); if (dim < dim2) - tmp = Fcons (XCAR (tail), Fcons (tmp, Qnil)); + tmp = list2 (XCAR (tail), tmp); else - tmp = Fcons (tmp, Fcons (XCAR (tail), Qnil)); + tmp = list2 (tmp, XCAR (tail)); } else { @@ -10093,7 +10137,7 @@ usage: (define-coding-system-internal ...) */) break; } if (NILP (tmp2)) - tmp = nconc2 (tmp, Fcons (XCAR (tail), Qnil)); + tmp = nconc2 (tmp, list1 (XCAR (tail))); else { XSETCDR (tmp2, Fcons (XCAR (tmp2), XCDR (tmp2))); @@ -10411,7 +10455,7 @@ usage: (define-coding-system-internal ...) */) && ! EQ (eol_type, Qmac)) error ("Invalid eol-type"); - aliases = Fcons (name, Qnil); + aliases = list1 (name); if (NILP (eol_type)) { @@ -10421,7 +10465,7 @@ usage: (define-coding-system-internal ...) */) Lisp_Object this_spec, this_name, this_aliases, this_eol_type; this_name = AREF (eol_type, i); - this_aliases = Fcons (this_name, Qnil); + this_aliases = list1 (this_name); this_eol_type = (i == 0 ? Qunix : i == 1 ? Qdos : Qmac); this_spec = make_uninit_vector (3); ASET (this_spec, 0, attrs); @@ -10536,7 +10580,7 @@ DEFUN ("define-coding-system-alias", Fdefine_coding_system_alias, list. */ while (!NILP (XCDR (aliases))) aliases = XCDR (aliases); - XSETCDR (aliases, Fcons (alias, Qnil)); + XSETCDR (aliases, list1 (alias)); eol_type = AREF (spec, 2); if (VECTORP (eol_type)) @@ -10770,11 +10814,6 @@ syms_of_coding (void) Fput (Qcoding_system_error, Qerror_message, build_pure_c_string ("Invalid coding system")); - /* Intern this now in case it isn't already done. - Setting this variable twice is harmless. - But don't staticpro it here--that is done in alloc.c. */ - Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots"); - DEFSYM (Qtranslation_table, "translation-table"); Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (2)); DEFSYM (Qtranslation_table_id, "translation-table-id"); @@ -11218,6 +11257,8 @@ character."); plist[13] = build_pure_c_string ("No conversion on encoding, automatic conversion on decoding."); plist[15] = args[coding_arg_eol_type] = Qnil; args[coding_arg_plist] = Flist (16, plist); + args[coding_arg_undecided_inhibit_null_byte_detection] = make_number (0); + args[coding_arg_undecided_inhibit_iso_escape_detection] = make_number (0); Fdefine_coding_system_internal (coding_arg_undecided_max, args); }