X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/3943ed767c4230c9cf7a489124ede96168a4c33b..7087d5e9af41c8835c3a5090bd8e2c6893685466:/src/charset.c diff --git a/src/charset.c b/src/charset.c index 9c9395fdc6..052f318683 100644 --- a/src/charset.c +++ b/src/charset.c @@ -1,8 +1,8 @@ /* Basic character set support. - Copyright (C) 2001, 2002, 2003, 2004, 2005, - 2006, 2007, 2008 Free Software Foundation, Inc. + Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, + 2008, 2009 Free Software Foundation, Inc. Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, - 2005, 2006, 2007, 2008 + 2005, 2006, 2007, 2008, 2009 National Institute of Advanced Industrial Science and Technology (AIST) Registration Number H14PRO021 @@ -12,10 +12,10 @@ This file is part of GNU Emacs. -GNU Emacs is free software; you can redistribute it and/or modify +GNU Emacs is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. GNU Emacs is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -23,9 +23,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GNU Emacs; see the file COPYING. If not, write to -the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -Boston, MA 02110-1301, USA. */ +along with GNU Emacs. If not, see . */ #include @@ -76,12 +74,14 @@ Lisp_Object Qascii; Lisp_Object Qeight_bit; Lisp_Object Qiso_8859_1; Lisp_Object Qunicode; +Lisp_Object Qemacs; /* The corresponding charsets. */ int charset_ascii; int charset_eight_bit; int charset_iso_8859_1; int charset_unicode; +int charset_emacs; /* The other special charsets. */ int charset_jisx0201_roman; @@ -97,6 +97,10 @@ int charset_unibyte; /* List of charsets ordered by the priority. */ Lisp_Object Vcharset_ordered_list; +/* Sub-list of Vcharset_ordered_list that contains all non-preferred + charsets. */ +Lisp_Object Vcharset_non_preferred_head; + /* Incremented everytime we change Vcharset_ordered_list. This is unsigned short so that it fits in Lisp_Int and never matches -1. */ @@ -116,7 +120,10 @@ int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL]; Lisp_Object Vcharset_map_path; -Lisp_Object Vchar_unified_charset_table; +/* If nonzero, don't load charset maps. */ +int inhibit_load_charset_map; + +Lisp_Object Vcurrent_iso639_language; /* Defined in chartab.c */ extern void @@ -160,7 +167,63 @@ map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object), | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \ << 24)))) +/* Structure to hold mapping tables for a charset. Used by temacs + invoked for dumping. */ +static struct +{ + /* The current charset for which the following tables are setup. */ + struct charset *current; + + /* 1 iff the following table is used for encoder. */ + short for_encoder; + + /* When the following table is used for encoding, mininum and + maxinum character of the current charset. */ + int min_char, max_char; + + /* A Unicode character correspoinding to the code indice 0 (i.e. the + minimum code-point) of the current charset, or -1 if the code + indice 0 is not a Unicode character. This is checked when + table.encoder[CHAR] is zero. */ + int zero_index_char; + + union { + /* Table mapping code-indices (not code-points) of the current + charset to Unicode characters. If decoder[CHAR] is -1, CHAR + doesn't belong to the current charset. */ + int decoder[0x10000]; + /* Table mapping Unicode characters to code-indices of the current + charset. The first 0x10000 elements are for BMP (0..0xFFFF), + and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP + (0x20000..0x2FFFF). Note that there is no charset map that + uses both SMP and SIP. */ + unsigned short encoder[0x20000]; + } table; +} *temp_charset_work; + +#define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \ + do { \ + if ((CODE) == 0) \ + temp_charset_work->zero_index_char = (C); \ + else if ((C) < 0x20000) \ + temp_charset_work->table.encoder[(C)] = (CODE); \ + else \ + temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \ + } while (0) + +#define GET_TEMP_CHARSET_WORK_ENCODER(C) \ + ((C) == temp_charset_work->zero_index_char ? 0 \ + : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \ + ? (int) temp_charset_work->table.encoder[(C)] : -1) \ + : temp_charset_work->table.encoder[(C) - 0x10000] \ + ? temp_charset_work->table.encoder[(C) - 0x10000] : -1) + +#define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \ + (temp_charset_work->table.decoder[(CODE)] = (C)) + +#define GET_TEMP_CHARSET_WORK_DECODER(CODE) \ + (temp_charset_work->table.decoder[(CODE)]) /* Set to 1 to warn that a charset map is loaded and thus a buffer @@ -176,16 +239,30 @@ struct charset_map_entries struct charset_map_entries *next; }; -/* Load the mapping information for CHARSET from ENTRIES. +/* Load the mapping information of CHARSET from ENTRIES for + initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and + encoding (CONTROL_FLAG == 2). + + If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char, + and CHARSET->fast_map. - If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char. + If CONTROL_FLAG is 1, setup the following tables according to + CHARSET->method and inhibit_load_charset_map. - If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char, - CHARSET->decoder, and CHARSET->encoder. + CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1 + ----------------------+--------------------+--------------------------- + CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder + ----------------------+--------------------+--------------------------- + CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder - If CONTROL_FLAG is 2, setup CHARSET->deunifier and - Vchar_unify_table. If Vchar_unified_charset_table is non-nil, - setup it too. */ + If CONTROL_FLAG is 2, setup the following tables. + + CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1 + ----------------------+--------------------+--------------------------- + CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder + ----------------------+--------------------+-------------------------- + CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder +*/ static void load_charset_map (charset, entries, n_entries, control_flag) @@ -204,16 +281,55 @@ load_charset_map (charset, entries, n_entries, control_flag) if (n_entries <= 0) return; - if (control_flag > 0) + if (control_flag) { - int n = CODE_POINT_TO_INDEX (charset, max_code) + 1; - - table = Fmake_char_table (Qnil, Qnil); - if (control_flag == 1) - vec = Fmake_vector (make_number (n), make_number (-1)); - else if (! CHAR_TABLE_P (Vchar_unify_table)) - Vchar_unify_table = Fmake_char_table (Qnil, Qnil); + if (! inhibit_load_charset_map) + { + if (control_flag == 1) + { + if (charset->method == CHARSET_METHOD_MAP) + { + int n = CODE_POINT_TO_INDEX (charset, max_code) + 1; + vec = CHARSET_DECODER (charset) + = Fmake_vector (make_number (n), make_number (-1)); + } + else + { + char_table_set_range (Vchar_unify_table, + charset->min_char, charset->max_char, + Qnil); + } + } + else + { + table = Fmake_char_table (Qnil, Qnil); + if (charset->method == CHARSET_METHOD_MAP) + CHARSET_ENCODER (charset) = table; + else + CHARSET_DEUNIFIER (charset) = table; + } + } + else + { + if (! temp_charset_work) + temp_charset_work = malloc (sizeof (*temp_charset_work)); + if (control_flag == 1) + { + memset (temp_charset_work->table.decoder, -1, + sizeof (int) * 0x10000); + temp_charset_work->for_encoder = 0; + } + else + { + memset (temp_charset_work->table.encoder, 0, + sizeof (unsigned short) * 0x20000); + temp_charset_work->zero_index_char = -1; + } + temp_charset_work->current = charset; + temp_charset_work->for_encoder = (control_flag == 2); + control_flag += 2; + } charset_map_loaded = 1; } @@ -245,14 +361,48 @@ load_charset_map (charset, entries, n_entries, control_flag) if (from_index < 0 || to_index < 0) continue; - if (control_flag < 2) - { - int c; + if (to_c > max_char) + max_char = to_c; + else if (from_c < min_char) + min_char = from_c; - if (to_c > max_char) - max_char = to_c; - else if (from_c < min_char) - min_char = from_c; + if (control_flag == 1) + { + if (charset->method == CHARSET_METHOD_MAP) + for (; from_index <= to_index; from_index++, from_c++) + ASET (vec, from_index, make_number (from_c)); + else + for (; from_index <= to_index; from_index++, from_c++) + CHAR_TABLE_SET (Vchar_unify_table, + CHARSET_CODE_OFFSET (charset) + from_index, + make_number (from_c)); + } + else if (control_flag == 2) + { + if (charset->method == CHARSET_METHOD_MAP + && CHARSET_COMPACT_CODES_P (charset)) + for (; from_index <= to_index; from_index++, from_c++) + { + unsigned code = INDEX_TO_CODE_POINT (charset, from_index); + + if (NILP (CHAR_TABLE_REF (table, from_c))) + CHAR_TABLE_SET (table, from_c, make_number (code)); + } + else + for (; from_index <= to_index; from_index++, from_c++) + { + if (NILP (CHAR_TABLE_REF (table, from_c))) + CHAR_TABLE_SET (table, from_c, make_number (from_index)); + } + } + else if (control_flag == 3) + for (; from_index <= to_index; from_index++, from_c++) + SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index); + else if (control_flag == 4) + for (; from_index <= to_index; from_index++, from_c++) + SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index); + else /* control_flag == 0 */ + { if (ascii_compatible_p) { if (! ASCII_BYTE_P (from_c)) @@ -266,70 +416,22 @@ load_charset_map (charset, entries, n_entries, control_flag) } } - for (c = from_c; c <= to_c; c++) - CHARSET_FAST_MAP_SET (c, fast_map); - - if (control_flag == 1) - { - unsigned code = from; - - if (CHARSET_COMPACT_CODES_P (charset)) - while (1) - { - ASET (vec, from_index, make_number (from_c)); - if (NILP (CHAR_TABLE_REF (table, from_c))) - CHAR_TABLE_SET (table, from_c, make_number (code)); - if (from_index == to_index) - break; - from_index++, from_c++; - code = INDEX_TO_CODE_POINT (charset, from_index); - } - else - for (; from_index <= to_index; from_index++, from_c++) - { - ASET (vec, from_index, make_number (from_c)); - if (NILP (CHAR_TABLE_REF (table, from_c))) - CHAR_TABLE_SET (table, from_c, make_number (from_index)); - } - } - } - else - { - unsigned code = from; - - while (1) - { - int c1 = DECODE_CHAR (charset, code); - - if (c1 >= 0) - { - CHAR_TABLE_SET (table, from_c, make_number (c1)); - CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c)); - if (CHAR_TABLE_P (Vchar_unified_charset_table)) - CHAR_TABLE_SET (Vchar_unified_charset_table, c1, - CHARSET_NAME (charset)); - } - if (from_index == to_index) - break; - from_index++, from_c++; - code = INDEX_TO_CODE_POINT (charset, from_index); - } + for (; from_c <= to_c; from_c++) + CHARSET_FAST_MAP_SET (from_c, fast_map); } } - if (control_flag < 2) + if (control_flag == 0) { CHARSET_MIN_CHAR (charset) = (ascii_compatible_p ? nonascii_min_char : min_char); CHARSET_MAX_CHAR (charset) = max_char; - if (control_flag == 1) - { - CHARSET_DECODER (charset) = vec; - CHARSET_ENCODER (charset) = table; - } } - else - CHARSET_DEUNIFIER (charset) = table; + else if (control_flag == 4) + { + temp_charset_work->min_char = min_char; + temp_charset_work->max_char = max_char; + } } @@ -525,21 +627,31 @@ load_charset_map_from_vector (charset, vec, control_flag) load_charset_map (charset, head, n_entries, control_flag); } + +/* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of + map it is (see the comment of load_charset_map for the detail). */ + static void -load_charset (charset) +load_charset (charset, control_flag) struct charset *charset; + int control_flag; { - if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED) - { - Lisp_Object map; + Lisp_Object map; - map = CHARSET_MAP (charset); - if (STRINGP (map)) - load_charset_map_from_file (charset, map, 1); - else - load_charset_map_from_vector (charset, map, 1); - CHARSET_METHOD (charset) = CHARSET_METHOD_MAP; - } + if (inhibit_load_charset_map + && temp_charset_work + && charset == temp_charset_work->current + && (control_flag == 2 == temp_charset_work->for_encoder)) + return; + + if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP) + map = CHARSET_MAP (charset); + else if (CHARSET_UNIFIED_P (charset)) + map = CHARSET_UNIFY_MAP (charset); + if (STRINGP (map)) + load_charset_map_from_file (charset, map, control_flag); + else + load_charset_map_from_vector (charset, map, control_flag); } @@ -552,6 +664,68 @@ DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0, } +void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object), + Lisp_Object function, Lisp_Object arg, + unsigned from, unsigned to)); + +void +map_charset_for_dump (c_function, function, arg, from, to) + void (*c_function) (Lisp_Object, Lisp_Object); + Lisp_Object function, arg; + unsigned from, to; +{ + int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from); + int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to); + Lisp_Object range; + int c, stop; + struct gcpro gcpro1; + + range = Fcons (Qnil, Qnil); + GCPRO1 (range); + + c = temp_charset_work->min_char; + stop = (temp_charset_work->max_char < 0x20000 + ? temp_charset_work->max_char : 0xFFFF); + + while (1) + { + int index = GET_TEMP_CHARSET_WORK_ENCODER (c); + + if (index >= from_idx && index <= to_idx) + { + if (NILP (XCAR (range))) + XSETCAR (range, make_number (c)); + } + else if (! NILP (XCAR (range))) + { + XSETCDR (range, make_number (c - 1)); + if (c_function) + (*c_function) (arg, range); + else + call2 (function, range, arg); + XSETCAR (range, Qnil); + } + if (c == stop) + { + if (c == temp_charset_work->max_char) + { + if (! NILP (XCAR (range))) + { + XSETCDR (range, make_number (c)); + if (c_function) + (*c_function) (arg, range); + else + call2 (function, range, arg); + } + break; + } + c = 0x1FFFF; + stop = temp_charset_work->max_char; + } + c++; + } +} + void map_charset_chars (c_function, function, arg, charset, from, to) @@ -563,20 +737,9 @@ map_charset_chars (c_function, function, arg, Lisp_Object range; int partial; - if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED) - load_charset (charset); - partial = (from > CHARSET_MIN_CODE (charset) || to < CHARSET_MAX_CODE (charset)); - if (CHARSET_UNIFIED_P (charset) - && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset))) - { - map_char_table_for_charset (c_function, function, - CHARSET_DEUNIFIER (charset), arg, - partial ? charset : NULL, from, to); - } - if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET) { int from_idx = CODE_POINT_TO_INDEX (charset, from); @@ -584,6 +747,18 @@ map_charset_chars (c_function, function, arg, int from_c = from_idx + CHARSET_CODE_OFFSET (charset); int to_c = to_idx + CHARSET_CODE_OFFSET (charset); + if (CHARSET_UNIFIED_P (charset)) + { + if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset))) + load_charset (charset, 2); + if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset))) + map_char_table_for_charset (c_function, function, + CHARSET_DEUNIFIER (charset), arg, + partial ? charset : NULL, from, to); + else + map_charset_for_dump (c_function, function, arg, from, to); + } + range = Fcons (make_number (from_c), make_number (to_c)); if (NILP (function)) (*c_function) (arg, range); @@ -593,10 +768,13 @@ map_charset_chars (c_function, function, arg, else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP) { if (! CHAR_TABLE_P (CHARSET_ENCODER (charset))) - return; - map_char_table_for_charset (c_function, function, - CHARSET_ENCODER (charset), arg, - partial ? charset : NULL, from, to); + load_charset (charset, 2); + if (CHAR_TABLE_P (CHARSET_ENCODER (charset))) + map_char_table_for_charset (c_function, function, + CHARSET_ENCODER (charset), arg, + partial ? charset : NULL, from, to); + else + map_charset_for_dump (c_function, function, arg, from, to); } else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET) { @@ -815,7 +993,7 @@ usage: (define-charset-internal ...) */) charset.max_code = code; } - charset.compact_codes_p = charset.max_code < 0x1000000; + charset.compact_codes_p = charset.max_code < 0x10000; val = args[charset_arg_invalid_code]; if (NILP (val)) @@ -904,11 +1082,7 @@ usage: (define-charset-internal ...) */) { val = args[charset_arg_map]; ASET (attrs, charset_map, val); - if (STRINGP (val)) - load_charset_map_from_file (&charset, val, 0); - else - load_charset_map_from_vector (&charset, val, 0); - charset.method = CHARSET_METHOD_MAP_DEFERRED; + charset.method = CHARSET_METHOD_MAP; } else if (! NILP (args[charset_arg_subset])) { @@ -1024,6 +1198,12 @@ usage: (define-charset-internal ...) */) charset.id = id; charset_table[id] = charset; + if (charset.method == CHARSET_METHOD_MAP) + { + load_charset (&charset, 0); + charset_table[id] = charset; + } + if (charset.iso_final >= 0) { ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96, @@ -1038,7 +1218,7 @@ usage: (define-charset-internal ...) */) else if (ISO_CHARSET_TABLE (2, 0, 'B') == id) charset_jisx0208 = id; } - + if (charset.emacs_mule_id >= 0) { emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id); @@ -1058,8 +1238,29 @@ usage: (define-charset-internal ...) */) Vcharset_ordered_list = nconc2 (Vcharset_ordered_list, Fcons (make_number (id), Qnil)); else - Vcharset_ordered_list = Fcons (make_number (id), - Vcharset_ordered_list); + { + Lisp_Object tail; + + for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail)) + { + struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail))); + + if (cs->supplementary_p) + break; + } + if (EQ (tail, Vcharset_ordered_list)) + Vcharset_ordered_list = Fcons (make_number (id), + Vcharset_ordered_list); + else if (NILP (tail)) + Vcharset_ordered_list = nconc2 (Vcharset_ordered_list, + Fcons (make_number (id), Qnil)); + else + { + val = Fcons (XCAR (tail), XCDR (tail)); + XSETCDR (tail, val); + XSETCAR (tail, make_number (id)); + } + } charset_ordered_list_tick++; } @@ -1149,42 +1350,6 @@ DEFUN ("define-charset-alias", Fdefine_charset_alias, } -DEFUN ("unibyte-charset", Funibyte_charset, Sunibyte_charset, 0, 0, 0, - doc: /* Return the unibyte charset (set by `set-unibyte-charset'). */) - () -{ - return CHARSET_NAME (CHARSET_FROM_ID (charset_unibyte)); -} - - -DEFUN ("set-unibyte-charset", Fset_unibyte_charset, Sset_unibyte_charset, - 1, 1, 0, - doc: /* Set the unibyte charset to CHARSET. -This determines how unibyte/multibyte conversion is done. See also -function `unibyte-charset'. */) - (charset) - Lisp_Object charset; -{ - struct charset *cs; - int i, c; - - CHECK_CHARSET_GET_CHARSET (charset, cs); - if (! cs->ascii_compatible_p - || cs->dimension != 1) - error ("Inappropriate unibyte charset: %s", SDATA (SYMBOL_NAME (charset))); - charset_unibyte = cs->id; - memset (unibyte_has_multibyte_table, 1, 128); - for (i = 128; i < 256; i++) - { - c = DECODE_CHAR (cs, i); - unibyte_to_multibyte_table[i] = (c < 0 ? BYTE8_TO_CHAR (i) : c); - unibyte_has_multibyte_table[i] = c >= 0; - } - - return Qnil; -} - - DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0, doc: /* Return the property list of CHARSET. */) (charset) @@ -1228,8 +1393,6 @@ Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */) CHECK_CHARSET_GET_ID (charset, id); cs = CHARSET_FROM_ID (id); - if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED) - load_charset (cs); if (NILP (deunify) ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs)) : ! CHARSET_UNIFIED_P (cs)) @@ -1238,18 +1401,21 @@ Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */) CHARSET_UNIFIED_P (cs) = 0; if (NILP (deunify)) { - if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET) + if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET + || CHARSET_CODE_OFFSET (cs) < 0x110000) error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset))); if (NILP (unify_map)) unify_map = CHARSET_UNIFY_MAP (cs); - if (STRINGP (unify_map)) - load_charset_map_from_file (cs, unify_map, 2); - else if (VECTORP (unify_map)) - load_charset_map_from_vector (cs, unify_map, 2); - else if (NILP (unify_map)) - error ("No unify-map for charset"); else - error ("Bad unify-map arg"); + { + if (! STRINGP (unify_map) && ! VECTORP (unify_map)) + signal_error ("Bad unify-map", unify_map); + CHARSET_UNIFY_MAP (cs) = unify_map; + } + if (NILP (Vchar_unify_table)) + Vchar_unify_table = Fmake_char_table (Qnil, Qnil); + char_table_set_range (Vchar_unify_table, + cs->min_char, cs->max_char, charset); CHARSET_UNIFIED_P (cs) = 1; } else if (CHAR_TABLE_P (Vchar_unify_table)) @@ -1268,7 +1434,7 @@ Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */) DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char, Sget_unused_iso_final_char, 2, 2, 0, doc: /* -Return an unused ISO final char for a charset of DIMENISION and CHARS. +Return an unused ISO final char for a charset of DIMENSION and CHARS. DIMENSION is the number of bytes to represent a character: 1 or 2. CHARS is the number of characters in a dimension: 94 or 96. @@ -1494,6 +1660,41 @@ only `ascii', `eight-bit-control', and `eight-bit-graphic'. */) +/* Return a unified character code for C (>= 0x110000). VAL is a + value of Vchar_unify_table for C; i.e. it is nil, an integer, or a + charset symbol. */ +int +maybe_unify_char (c, val) + int c; + Lisp_Object val; +{ + struct charset *charset; + + if (INTEGERP (val)) + return XINT (val); + if (NILP (val)) + return c; + + CHECK_CHARSET_GET_CHARSET (val, charset); + load_charset (charset, 1); + if (! inhibit_load_charset_map) + { + val = CHAR_TABLE_REF (Vchar_unify_table, c); + if (! NILP (val)) + c = XINT (val); + } + else + { + int code_index = c - CHARSET_CODE_OFFSET (charset); + int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index); + + if (unified > 0) + c = unified; + } + return c; +} + + /* Return a character correponding to the code-point CODE of CHARSET. */ @@ -1508,12 +1709,6 @@ decode_char (charset, code) if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset)) return -1; - if (method == CHARSET_METHOD_MAP_DEFERRED) - { - load_charset (charset); - method = CHARSET_METHOD (charset); - } - if (method == CHARSET_METHOD_SUBSET) { Lisp_Object subset_info; @@ -1556,21 +1751,24 @@ decode_char (charset, code) decoder = CHARSET_DECODER (charset); if (! VECTORP (decoder)) - return -1; - c = XINT (AREF (decoder, char_index)); + { + load_charset (charset, 1); + decoder = CHARSET_DECODER (charset); + } + if (VECTORP (decoder)) + c = XINT (AREF (decoder, char_index)); + else + c = GET_TEMP_CHARSET_WORK_DECODER (char_index); } - else + else /* method == CHARSET_METHOD_OFFSET */ { c = char_index + CHARSET_CODE_OFFSET (charset); + if (CHARSET_UNIFIED_P (charset) + && c > MAX_UNICODE_CHAR) + MAYBE_UNIFY_CHAR (c); } } - if (CHARSET_UNIFIED_P (charset) - && c >= 0) - { - MAYBE_UNIFY_CHAR (c); - } - return c; } @@ -1592,16 +1790,27 @@ encode_char (charset, c) if (CHARSET_UNIFIED_P (charset)) { Lisp_Object deunifier, deunified; + int code_index = -1; deunifier = CHARSET_DEUNIFIER (charset); if (! CHAR_TABLE_P (deunifier)) { - Funify_charset (CHARSET_NAME (charset), Qnil, Qnil); + load_charset (charset, 2); deunifier = CHARSET_DEUNIFIER (charset); } - deunified = CHAR_TABLE_REF (deunifier, c); - if (! NILP (deunified)) - c = XINT (deunified); + if (CHAR_TABLE_P (deunifier)) + { + Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c); + + if (INTEGERP (deunified)) + code_index = XINT (deunified); + } + else + { + code_index = GET_TEMP_CHARSET_WORK_ENCODER (c); + } + if (code_index >= 0) + c = CHARSET_CODE_OFFSET (charset) + code_index; } if (method == CHARSET_METHOD_SUBSET) @@ -1642,12 +1851,6 @@ encode_char (charset, c) || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset)) return CHARSET_INVALID_CODE (charset); - if (method == CHARSET_METHOD_MAP_DEFERRED) - { - load_charset (charset); - method = CHARSET_METHOD (charset); - } - if (method == CHARSET_METHOD_MAP) { Lisp_Object encoder; @@ -1655,18 +1858,30 @@ encode_char (charset, c) encoder = CHARSET_ENCODER (charset); if (! CHAR_TABLE_P (CHARSET_ENCODER (charset))) - return CHARSET_INVALID_CODE (charset); - val = CHAR_TABLE_REF (encoder, c); - if (NILP (val)) - return CHARSET_INVALID_CODE (charset); - code = XINT (val); - if (! CHARSET_COMPACT_CODES_P (charset)) - code = INDEX_TO_CODE_POINT (charset, code); + { + load_charset (charset, 2); + encoder = CHARSET_ENCODER (charset); + } + if (CHAR_TABLE_P (encoder)) + { + val = CHAR_TABLE_REF (encoder, c); + if (NILP (val)) + return CHARSET_INVALID_CODE (charset); + code = XINT (val); + if (! CHARSET_COMPACT_CODES_P (charset)) + code = INDEX_TO_CODE_POINT (charset, code); + } + else + { + code = GET_TEMP_CHARSET_WORK_ENCODER (c); + code = INDEX_TO_CODE_POINT (charset, code); + } } else /* method == CHARSET_METHOD_OFFSET */ { - code = c - CHARSET_CODE_OFFSET (charset); - code = INDEX_TO_CODE_POINT (charset, code); + int code_index = c - CHARSET_CODE_OFFSET (charset); + + code = INDEX_TO_CODE_POINT (charset, code_index); } return code; @@ -1680,7 +1895,7 @@ Return nil if CODE-POINT is not valid in CHARSET. CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). Optional argument RESTRICTION specifies a way to map the pair of CCS -and CODE-POINT to a chracter. Currently not supported and just ignored. */) +and CODE-POINT to a character. Currently not supported and just ignored. */) (charset, code_point, restriction) Lisp_Object charset, code_point, restriction; { @@ -1710,7 +1925,7 @@ DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0, doc: /* Encode the character CH into a code-point of CHARSET. Return nil if CHARSET doesn't include CH. -Optional argument RESTRICTION specifies a way to map CHAR to a +Optional argument RESTRICTION specifies a way to map CH to a code-point in CCS. Currently not supported and just ignored. */) (ch, charset, restriction) Lisp_Object ch, charset, restriction; @@ -1822,8 +2037,12 @@ char_charset (c, charset_list, code_return) Lisp_Object charset_list; unsigned *code_return; { + int maybe_null = 0; + if (NILP (charset_list)) charset_list = Vcharset_ordered_list; + else + maybe_null = 1; while (CONSP (charset_list)) { @@ -1837,17 +2056,22 @@ char_charset (c, charset_list, code_return) return charset; } charset_list = XCDR (charset_list); + if (c <= MAX_UNICODE_CHAR + && EQ (charset_list, Vcharset_non_preferred_head)) + return CHARSET_FROM_ID (charset_unicode); } - return NULL; + return (maybe_null ? NULL + : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs) + : CHARSET_FROM_ID (charset_eight_bit)); } DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, doc: - /*Return list of charset and one to four position-codes of CHAR. + /*Return list of charset and one to four position-codes of CH. The charset is decided by the current priority order of charsets. A position-code is a byte value of each dimension of the code-point of -CHAR in the charset. */) +CH in the charset. */) (ch) Lisp_Object ch; { @@ -1874,15 +2098,41 @@ CHAR in the charset. */) } -DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0, - doc: /* Return the charset of highest priority that contains CH. */) - (ch) - Lisp_Object ch; +DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0, + doc: /* Return the charset of highest priority that contains CH. +If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets +from which to find the charset. It may also be a coding system. In +that case, find the charset from what supported by that coding system. */) + (ch, restriction) + Lisp_Object ch, restriction; { struct charset *charset; CHECK_CHARACTER (ch); - charset = CHAR_CHARSET (XINT (ch)); + if (NILP (restriction)) + charset = CHAR_CHARSET (XINT (ch)); + else + { + Lisp_Object charset_list; + + if (CONSP (restriction)) + { + for (charset_list = Qnil; CONSP (restriction); + restriction = XCDR (restriction)) + { + int id; + + CHECK_CHARSET_GET_ID (XCAR (restriction), id); + charset_list = Fcons (make_number (id), charset_list); + } + charset_list = Fnreverse (charset_list); + } + else + charset_list = coding_system_charset_list (restriction); + charset = char_charset (XINT (ch), charset_list, NULL); + if (! charset) + return Qnil; + } return (CHARSET_NAME (charset)); } @@ -1912,7 +2162,7 @@ Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR. ISO 2022's designation sequence (escape sequence) distinguishes charsets by their DIMENSION, CHARS, and FINAL-CHAR, -where as Emacs distinguishes them by charset symbol. +whereas Emacs distinguishes them by charset symbol. See the documentation of the function `charset-info' for the meanings of DIMENSION, CHARS, and FINAL-CHAR. */) (dimension, chars, final_char) @@ -1932,35 +2182,23 @@ DIMENSION, CHARS, and FINAL-CHAR. */) DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps, 0, 0, 0, doc: /* -Clear encoder and decoder of charsets that are loaded from mapfiles. */) +Internal use only. +Clear temporary charset mapping tables. +It should be called only from temacs invoked for dumping. */) () { int i; struct charset *charset; Lisp_Object attrs; - for (i = 0; i < charset_table_used; i++) + if (temp_charset_work) { - charset = CHARSET_FROM_ID (i); - attrs = CHARSET_ATTRIBUTES (charset); - - if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP) - { - CHARSET_ATTR_DECODER (attrs) = Qnil; - CHARSET_ATTR_ENCODER (attrs) = Qnil; - CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED; - } - - if (CHARSET_UNIFIED_P (charset)) - CHARSET_ATTR_DEUNIFIER (attrs) = Qnil; + free (temp_charset_work); + temp_charset_work = NULL; } - if (CHAR_TABLE_P (Vchar_unified_charset_table)) - { - Foptimize_char_table (Vchar_unified_charset_table); - Vchar_unify_table = Vchar_unified_charset_table; - Vchar_unified_charset_table = Qnil; - } + if (CHAR_TABLE_P (Vchar_unify_table)) + Foptimize_char_table (Vchar_unify_table, Qnil); return Qnil; } @@ -2009,7 +2247,7 @@ usage: (set-charset-priority &rest charsets) */) } } arglist[0] = Fnreverse (new_head); - arglist[1] = old_list; + arglist[1] = Vcharset_non_preferred_head = old_list; Vcharset_ordered_list = Fnconc (2, arglist); charset_ordered_list_tick++; @@ -2044,9 +2282,18 @@ Return charset identification number of CHARSET. */) void init_charset () { - Vcharset_map_path - = Fcons (Fexpand_file_name (build_string ("charsets"), Vdata_directory), - Qnil); + Lisp_Object tempdir; + tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory); + if (access (SDATA (tempdir), 0) < 0) + { + dir_warning ("Error: charsets directory (%s) does not exist.\n\ +Emacs will not function correctly without the character map files.\n\ +Please check your installation!\n", + tempdir); + /* TODO should this be a fatal error? (Bug#909) */ + } + + Vcharset_map_path = Fcons (tempdir, Qnil); } @@ -2082,6 +2329,7 @@ syms_of_charset () DEFSYM (Qascii, "ascii"); DEFSYM (Qunicode, "unicode"); + DEFSYM (Qemacs, "emacs"); DEFSYM (Qeight_bit, "eight-bit"); DEFSYM (Qiso_8859_1, "iso-8859-1"); @@ -2114,15 +2362,10 @@ syms_of_charset () xmalloc (sizeof (struct charset) * charset_table_size)); charset_table_used = 0; - staticpro (&Vchar_unified_charset_table); - Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1)); - defsubr (&Scharsetp); defsubr (&Smap_charset_chars); defsubr (&Sdefine_charset_internal); defsubr (&Sdefine_charset_alias); - defsubr (&Sunibyte_charset); - defsubr (&Sset_unibyte_charset); defsubr (&Scharset_plist); defsubr (&Sset_charset_plist); defsubr (&Sunify_charset); @@ -2143,13 +2386,23 @@ syms_of_charset () defsubr (&Scharset_id_internal); DEFVAR_LISP ("charset-map-path", &Vcharset_map_path, - doc: /* *Lisp of directories to search for charset map files. */); + doc: /* *List of directories to search for charset map files. */); Vcharset_map_path = Qnil; + DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map, + doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */); + inhibit_load_charset_map = 0; + DEFVAR_LISP ("charset-list", &Vcharset_list, doc: /* List of all charsets ever defined. */); Vcharset_list = Qnil; + DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language, + doc: /* ISO639 language mnemonic symbol for the current language environment. +If the current language environment is for multiple languages (e.g. "Latin-1"), +the value may be a list of mnemonics. */); + Vcurrent_iso639_language = Qnil; + charset_ascii = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00", 0, 127, 'B', -1, 0, 1, 0, 0); @@ -2159,9 +2412,12 @@ syms_of_charset () charset_unicode = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10", 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0); + charset_emacs + = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F", + 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0); charset_eight_bit = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00", - 128, 255, -1, 0, -1, 0, 0, + 128, 255, -1, 0, -1, 0, 1, MAX_5_BYTE_CHAR + 1); }