X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/ea9d458bec46144ae3a4443e9b0aecbd00a1460b..f9e65eb300c487a85de743edc0bafd6434d6db5e:/src/coding.c diff --git a/src/coding.c b/src/coding.c index 7239c9f9c7..f5b4f80868 100644 --- a/src/coding.c +++ b/src/coding.c @@ -367,6 +367,8 @@ Lisp_Object Qtarget_idx; Lisp_Object Vselect_safe_coding_system_function; +int coding_system_require_warning; + /* Mnemonic string for each format of end-of-line. */ Lisp_Object eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac; /* Mnemonic string to indicate format of end-of-line is not yet @@ -379,6 +381,16 @@ int system_eol_type; #ifdef emacs +/* Information about which coding system is safe for which chars. + The value has the form (GENERIC-LIST . NON-GENERIC-ALIST). + + GENERIC-LIST is a list of generic coding systems which can encode + any characters. + + NON-GENERIC-ALIST is an alist of non generic coding systems vs the + corresponding char table that contains safe chars. */ +Lisp_Object Vcoding_system_safe_chars; + Lisp_Object Vcoding_system_list, Vcoding_system_alist; Lisp_Object Qcoding_system_p, Qcoding_system_error; @@ -485,26 +497,27 @@ Lisp_Object Vcharset_revision_alist; /* Default coding systems used for process I/O. */ Lisp_Object Vdefault_process_coding_system; +/* Char table for translating Quail and self-inserting input. */ +Lisp_Object Vtranslation_table_for_input; + /* Global flag to tell that we can't call post-read-conversion and pre-write-conversion functions. Usually the value is zero, but it is set to 1 temporarily while such functions are running. This is to avoid infinite recursive call. */ static int inhibit_pre_post_conversion; -/* Char-table containing safe coding systems of each character. */ -Lisp_Object Vchar_coding_system_table; Lisp_Object Qchar_coding_system; -/* Return `safe-chars' property of coding system CODING. Don't check - validity of CODING. */ +/* Return `safe-chars' property of CODING_SYSTEM (symbol). Don't check + its validity. */ Lisp_Object -coding_safe_chars (coding) - struct coding_system *coding; +coding_safe_chars (coding_system) + Lisp_Object coding_system; { Lisp_Object coding_spec, plist, safe_chars; - coding_spec = Fget (coding->symbol, Qcoding_system); + coding_spec = Fget (coding_system, Qcoding_system); plist = XVECTOR (coding_spec)->contents[3]; safe_chars = Fplist_get (XVECTOR (coding_spec)->contents[3], Qsafe_chars); return (CHAR_TABLE_P (safe_chars) ? safe_chars : Qt); @@ -673,8 +686,16 @@ detect_coding_emacs_mule (src, src_end, multibytep) /* Record one COMPONENT (alternate character or composition rule). */ -#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ - (coding->cmp_data->data[coding->cmp_data->used++] = component) +#define CODING_ADD_COMPOSITION_COMPONENT(coding, component) \ + do { \ + coding->cmp_data->data[coding->cmp_data->used++] = component; \ + if (coding->cmp_data->used - coding->cmp_data_start \ + == COMPOSITION_DATA_MAX_BUNCH_LENGTH) \ + { \ + CODING_ADD_COMPOSITION_END (coding, coding->produced_char); \ + coding->composing = COMPOSITION_NO; \ + } \ + } while (0) /* Get one byte from a data pointed by SRC and increment SRC. If SRC @@ -1305,7 +1326,7 @@ enum iso_code_class_type iso_code_class[256]; #define CHARSET_OK(idx, charset, c) \ (coding_system_table[idx] \ && (charset == CHARSET_ASCII \ - || (safe_chars = coding_safe_chars (coding_system_table[idx]), \ + || (safe_chars = coding_safe_chars (coding_system_table[idx]->symbol), \ CODING_SAFE_CHAR_P (safe_chars, c))) \ && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding_system_table[idx], \ charset) \ @@ -1314,6 +1335,9 @@ enum iso_code_class_type iso_code_class[256]; #define SHIFT_OUT_OK(idx) \ (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0) +#define COMPOSITION_OK(idx) \ + (coding_system_table[idx]->composing != COMPOSITION_DISABLED) + /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions". Check if a text is encoded in ISO2022. If it is, return an integer in which appropriate flag bits any of: @@ -1391,7 +1415,30 @@ detect_coding_iso2022 (src, src_end, multibytep) else if (c >= '0' && c <= '4') { /* ESC for start/end composition. */ - mask_found |= CODING_CATEGORY_MASK_ISO; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7)) + mask_found |= CODING_CATEGORY_MASK_ISO_7; + else + mask &= ~CODING_CATEGORY_MASK_ISO_7; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT)) + mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT; + else + mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_1)) + mask_found |= CODING_CATEGORY_MASK_ISO_8_1; + else + mask &= ~CODING_CATEGORY_MASK_ISO_8_1; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_2)) + mask_found |= CODING_CATEGORY_MASK_ISO_8_2; + else + mask &= ~CODING_CATEGORY_MASK_ISO_8_2; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)) + mask_found |= CODING_CATEGORY_MASK_ISO_7_ELSE; + else + mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE; + if (COMPOSITION_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)) + mask_found |= CODING_CATEGORY_MASK_ISO_8_ELSE; + else + mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE; break; } else @@ -1734,7 +1781,7 @@ decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) Lisp_Object translation_table; Lisp_Object safe_chars; - safe_chars = coding_safe_chars (coding); + safe_chars = coding_safe_chars (coding->symbol); if (NILP (Venable_character_translation)) translation_table = Qnil; @@ -2487,7 +2534,7 @@ encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes) Lisp_Object translation_table; Lisp_Object safe_chars; - safe_chars = coding_safe_chars (coding); + safe_chars = coding_safe_chars (coding->symbol); if (NILP (Venable_character_translation)) translation_table = Qnil; @@ -5274,6 +5321,9 @@ coding_restore_composition (coding, obj) int len = data[0] - 4, j; Lisp_Object args[MAX_COMPOSITION_COMPONENTS * 2 - 1]; + if (method == COMPOSITION_WITH_RULE_ALTCHARS + && len % 2 == 0) + len --; for (j = 0; j < len; j++) args[j] = make_number (data[4 + j]); components = (method == COMPOSITION_WITH_ALTCHARS @@ -6262,8 +6312,11 @@ detect_coding_system (src, src_bytes, highest, multibytep) DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region, 2, 3, 0, - doc: /* Detect coding system of the text in the region between START and END. -Return a list of possible coding systems ordered by priority. + doc: /* Detect how the byte sequence in the region is encoded. +Return a list of possible coding systems used on decoding a byte +sequence containing the bytes in the region between START and END when +the coding system `undecided' is specified. The list is ordered by +priority decided in the current language environment. If only ASCII characters are found, it returns a list of single element `undecided' or its subsidiary coding system according to a detected @@ -6306,8 +6359,11 @@ highest priority. */) DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string, 1, 2, 0, - doc: /* Detect coding system of the text in STRING. -Return a list of possible coding systems ordered by priority. + doc: /* Detect how the byte sequence in STRING is encoded. +Return a list of possible coding systems used on decoding a byte +sequence containing the bytes in STRING when the coding system +`undecided' is specified. The list is ordered by priority decided in +the current language environment. If only ASCII characters are found, it returns a list of single element `undecided' or its subsidiary coding system according to a detected @@ -6330,26 +6386,6 @@ highest priority. */) STRING_MULTIBYTE (string)); } -/* Return an intersection of lists L1 and L2. */ - -static Lisp_Object -intersection (l1, l2) - Lisp_Object l1, l2; -{ - Lisp_Object val = Fcons (Qnil, Qnil), tail; - - for (tail = val; CONSP (l1); l1 = XCDR (l1)) - { - if (!NILP (Fmemq (XCAR (l1), l2))) - { - XSETCDR (tail, Fcons (XCAR (l1), Qnil)); - tail = XCDR (tail); - } - } - return XCDR (val); -} - - /* Subroutine for Fsafe_coding_systems_region_internal. Return a list of coding systems that safely encode the multibyte @@ -6369,8 +6405,9 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) Lisp_Object safe_codings, work_table; int *single_byte_char_found; { - int c, len, idx; - Lisp_Object val; + int c, len, i; + Lisp_Object val, ch; + Lisp_Object prev, tail; while (p < pend) { @@ -6382,30 +6419,35 @@ find_safe_codings (p, pend, safe_codings, work_table, single_byte_char_found) if (SINGLE_BYTE_CHAR_P (c)) *single_byte_char_found = 1; if (NILP (safe_codings)) + /* Already all coding systems are excluded. */ continue; /* Check the safe coding systems for C. */ - val = char_table_ref_and_index (work_table, c, &idx); + ch = make_number (c); + val = Faref (work_table, ch); if (EQ (val, Qt)) /* This element was already checked. Ignore it. */ continue; /* Remember that we checked this element. */ - CHAR_TABLE_SET (work_table, make_number (idx), Qt); + Faset (work_table, ch, Qt); - /* If there are some safe coding systems for C and we have - already found the other set of coding systems for the - different characters, get the intersection of them. */ - if (!EQ (safe_codings, Qt) && !NILP (val)) - val = intersection (safe_codings, val); - safe_codings = val; + for (prev = tail = safe_codings; CONSP (tail); tail = XCDR (tail)) + { + val = XCAR (tail); + if (NILP (Faref (XCDR (val), ch))) + { + /* Exclued this coding system from SAFE_CODINGS. */ + if (EQ (tail, safe_codings)) + safe_codings = XCDR (safe_codings); + else + XSETCDR (prev, XCDR (tail)); + } + else + prev = tail; + } } return safe_codings; } - -/* Return a list of coding systems that safely encode the text between - START and END. If the text contains only ASCII or is unibyte, - return t. */ - DEFUN ("find-coding-systems-region-internal", Ffind_coding_systems_region_internal, Sfind_coding_systems_region_internal, 2, 2, 0, @@ -6464,28 +6506,35 @@ DEFUN ("find-coding-systems-region-internal", } /* The text contains non-ASCII characters. */ - work_table = Fcopy_sequence (Vchar_coding_system_table); - safe_codings = find_safe_codings (p1, p1end, Qt, work_table, + + work_table = Fmake_char_table (Qchar_coding_system, Qnil); + safe_codings = Fcopy_sequence (XCDR (Vcoding_system_safe_chars)); + + safe_codings = find_safe_codings (p1, p1end, safe_codings, work_table, &single_byte_char_found); if (p2 < p2end) safe_codings = find_safe_codings (p2, p2end, safe_codings, work_table, &single_byte_char_found); - - if (EQ (safe_codings, Qt)) - ; /* Nothing to be done. */ - else if (!single_byte_char_found) + if (EQ (safe_codings, XCDR (Vcoding_system_safe_chars))) + safe_codings = Qt; + else { - /* Append generic coding systems. */ - Lisp_Object args[2]; - args[0] = safe_codings; - args[1] = Fchar_table_extra_slot (Vchar_coding_system_table, - make_number (0)); - safe_codings = Fappend (2, args); + /* Turn safe_codings to a list of coding systems... */ + Lisp_Object val; + + if (single_byte_char_found) + /* ... and append these for eight-bit chars. */ + val = Fcons (Qraw_text, + Fcons (Qemacs_mule, Fcons (Qno_conversion, Qnil))); + else + /* ... and append generic coding systems. */ + val = Fcopy_sequence (XCAR (Vcoding_system_safe_chars)); + + for (; CONSP (safe_codings); safe_codings = XCDR (safe_codings)) + val = Fcons (XCAR (XCAR (safe_codings)), val); + safe_codings = val; } - else - safe_codings = Fcons (Qraw_text, - Fcons (Qemacs_mule, - Fcons (Qno_conversion, safe_codings))); + return safe_codings; } @@ -6566,7 +6615,10 @@ to the string. */) if (NILP (current_buffer->enable_multibyte_characters)) return Qnil; p = CHAR_POS_ADDR (from); - pend = CHAR_POS_ADDR (to); + if (to == GPT) + pend = GPT_ADDR; + else + pend = CHAR_POS_ADDR (to); } else { @@ -6601,7 +6653,7 @@ to the string. */) if (coding.type == coding_type_undecided) safe_chars = Qnil; else - safe_chars = coding_safe_chars (&coding); + safe_chars = coding_safe_chars (coding_system); if (STRINGP (string) || from >= GPT || to <= GPT) @@ -6888,8 +6940,7 @@ Return the corresponding character code in Big5. */) return val; } -DEFUN ("set-terminal-coding-system-internal", - Fset_terminal_coding_system_internal, +DEFUN ("set-terminal-coding-system-internal", Fset_terminal_coding_system_internal, Sset_terminal_coding_system_internal, 1, 1, 0, doc: /* Internal use only. */) (coding_system) @@ -6908,8 +6959,7 @@ DEFUN ("set-terminal-coding-system-internal", return Qnil; } -DEFUN ("set-safe-terminal-coding-system-internal", - Fset_safe_terminal_coding_system_internal, +DEFUN ("set-safe-terminal-coding-system-internal", Fset_safe_terminal_coding_system_internal, Sset_safe_terminal_coding_system_internal, 1, 1, 0, doc: /* Internal use only. */) (coding_system) @@ -6927,16 +6977,15 @@ DEFUN ("set-safe-terminal-coding-system-internal", return Qnil; } -DEFUN ("terminal-coding-system", - Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0, +DEFUN ("terminal-coding-system", Fterminal_coding_system, + Sterminal_coding_system, 0, 0, 0, doc: /* Return coding system specified for terminal output. */) () { return terminal_coding.symbol; } -DEFUN ("set-keyboard-coding-system-internal", - Fset_keyboard_coding_system_internal, +DEFUN ("set-keyboard-coding-system-internal", Fset_keyboard_coding_system_internal, Sset_keyboard_coding_system_internal, 1, 1, 0, doc: /* Internal use only. */) (coding_system) @@ -6949,8 +6998,8 @@ DEFUN ("set-keyboard-coding-system-internal", return Qnil; } -DEFUN ("keyboard-coding-system", - Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0, +DEFUN ("keyboard-coding-system", Fkeyboard_coding_system, + Skeyboard_coding_system, 0, 0, 0, doc: /* Return coding system specified for decoding keyboard input. */) () { @@ -7122,6 +7171,40 @@ This function is internal use only. */) return Qnil; } +DEFUN ("define-coding-system-internal", Fdefine_coding_system_internal, + Sdefine_coding_system_internal, 1, 1, 0, + doc: /* Register CODING-SYSTEM as a base coding system. +This function is internal use only. */) + (coding_system) + Lisp_Object coding_system; +{ + Lisp_Object safe_chars, slot; + + if (NILP (Fcheck_coding_system (coding_system))) + Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil)); + safe_chars = coding_safe_chars (coding_system); + if (! EQ (safe_chars, Qt) && ! CHAR_TABLE_P (safe_chars)) + error ("No valid safe-chars property for %s", + SDATA (SYMBOL_NAME (coding_system))); + if (EQ (safe_chars, Qt)) + { + if (NILP (Fmemq (coding_system, XCAR (Vcoding_system_safe_chars)))) + XSETCAR (Vcoding_system_safe_chars, + Fcons (coding_system, XCAR (Vcoding_system_safe_chars))); + } + else + { + slot = Fassq (coding_system, XCDR (Vcoding_system_safe_chars)); + if (NILP (slot)) + XSETCDR (Vcoding_system_safe_chars, + nconc2 (XCDR (Vcoding_system_safe_chars), + Fcons (Fcons (coding_system, safe_chars), Qnil))); + else + XSETCDR (slot, safe_chars); + } + return Qnil; +} + #endif /* emacs */ @@ -7275,6 +7358,9 @@ syms_of_coding () } } + Vcoding_system_safe_chars = Fcons (Qnil, Qnil); + staticpro (&Vcoding_system_safe_chars); + Qtranslation_table = intern ("translation-table"); staticpro (&Qtranslation_table); Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (1)); @@ -7299,7 +7385,7 @@ syms_of_coding () But don't staticpro it here--that is done in alloc.c. */ Qchar_table_extra_slots = intern ("char-table-extra-slots"); Fput (Qsafe_chars, Qchar_table_extra_slots, make_number (0)); - Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (2)); + Fput (Qchar_coding_system, Qchar_table_extra_slots, make_number (0)); Qvalid_codes = intern ("valid-codes"); staticpro (&Qvalid_codes); @@ -7334,6 +7420,7 @@ syms_of_coding () defsubr (&Sfind_operation_coding_system); defsubr (&Supdate_coding_systems_internal); defsubr (&Sset_coding_priority_internal); + defsubr (&Sdefine_coding_system_internal); DEFVAR_LISP ("coding-system-list", &Vcoding_system_list, doc: /* List of coding systems. @@ -7528,11 +7615,14 @@ coding system used in each operation can't encode the text. The default value is `select-safe-coding-system' (which see). */); Vselect_safe_coding_system_function = Qnil; - DEFVAR_LISP ("char-coding-system-table", &Vchar_coding_system_table, - doc: /* Char-table containing safe coding systems of each characters. -Each element doesn't include such generic coding systems that can -encode any characters. They are in the first extra slot. */); - Vchar_coding_system_table = Fmake_char_table (Qchar_coding_system, Qnil); + DEFVAR_BOOL ("coding-system-require-warning", + &coding_system_require_warning, + doc: /* Internal use only. +If non-nil, on writing a file, `select-safe-coding-system-function' is +called even if `coding-system-for-write' is non-nil. The command +`universal-coding-system-argument' binds this variable to t temporarily. */); + coding_system_require_warning = 0; + DEFVAR_BOOL ("inhibit-iso-escape-detection", &inhibit_iso_escape_detection, @@ -7560,6 +7650,12 @@ The other way to read escape sequences in a file without decoding is to explicitly specify some coding system that doesn't use ISO2022's escape sequence (e.g `latin-1') on reading by \\[universal-coding-system-argument]. */); inhibit_iso_escape_detection = 0; + + DEFVAR_LISP ("translation-table-for-input", &Vtranslation_table_for_input, + doc: /* Char table for translating self-inserting characters. +This is applied to the result of input methods, not their input. See also +`keyboard-translate-table'. */); + Vtranslation_table_for_input = Qnil; } char *