1 /* Basic character set support.
3 Copyright (C) 2001-2014 Free Software Foundation, Inc.
5 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
10 Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
14 This file is part of GNU Emacs.
16 GNU Emacs is free software: you can redistribute it and/or modify
17 it under the terms of the GNU General Public License as published by
18 the Free Software Foundation, either version 3 of the License, or
19 (at your option) any later version.
21 GNU Emacs is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
26 You should have received a copy of the GNU General Public License
27 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
35 #include <sys/types.h>
38 #include "character.h"
44 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
46 A coded character set ("charset" hereafter) is a meaningful
47 collection (i.e. language, culture, functionality, etc.) of
48 characters. Emacs handles multiple charsets at once. In Emacs Lisp
49 code, a charset is represented by a symbol. In C code, a charset is
50 represented by its ID number or by a pointer to a struct charset.
52 The actual information about each charset is stored in two places.
53 Lispy information is stored in the hash table Vcharset_hash_table as
54 a vector (charset attributes). The other information is stored in
55 charset_table as a struct charset.
59 /* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61 Lisp_Object Vcharset_hash_table
;
63 /* Table of struct charset. */
64 struct charset
*charset_table
;
66 static ptrdiff_t charset_table_size
;
67 static int charset_table_used
;
69 Lisp_Object Qcharsetp
;
71 /* Special charset symbols. */
73 static Lisp_Object Qeight_bit
;
74 static Lisp_Object Qiso_8859_1
;
75 static Lisp_Object Qunicode
;
76 static Lisp_Object Qemacs
;
78 /* The corresponding charsets. */
80 int charset_eight_bit
;
81 static int charset_iso_8859_1
;
83 static int charset_emacs
;
85 /* The other special charsets. */
86 int charset_jisx0201_roman
;
87 int charset_jisx0208_1978
;
91 /* Value of charset attribute `charset-iso-plane'. */
92 static Lisp_Object Qgl
, Qgr
;
94 /* Charset of unibyte characters. */
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list
;
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
102 Lisp_Object Vcharset_non_preferred_head
;
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
107 unsigned short charset_ordered_list_tick
;
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list
;
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list
;
115 int emacs_mule_charset
[256];
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
121 #define CODE_POINT_TO_INDEX(charset, code) \
122 ((charset)->code_linear_p \
123 ? (int) ((code) - (charset)->min_code) \
124 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
125 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
126 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
127 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
128 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
129 * (charset)->code_space[11]) \
130 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
131 * (charset)->code_space[7]) \
132 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
133 * (charset)->code_space[3]) \
134 + (((code) & 0xFF) - (charset)->code_space[0]) \
135 - ((charset)->char_index_offset)) \
139 /* Return the code-point for the character index IDX in CHARSET.
140 IDX should be an unsigned int variable in a valid range (which is
141 always in nonnegative int range too). IDX contains garbage afterwards. */
143 #define INDEX_TO_CODE_POINT(charset, idx) \
144 ((charset)->code_linear_p \
145 ? (idx) + (charset)->min_code \
146 : (idx += (charset)->char_index_offset, \
147 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
148 | (((charset)->code_space[4] \
149 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
151 | (((charset)->code_space[8] \
152 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
154 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
157 /* Structure to hold mapping tables for a charset. Used by temacs
158 invoked for dumping. */
162 /* The current charset for which the following tables are setup. */
163 struct charset
*current
;
165 /* 1 iff the following table is used for encoder. */
168 /* When the following table is used for encoding, minimum and
169 maximum character of the current charset. */
170 int min_char
, max_char
;
172 /* A Unicode character corresponding to the code index 0 (i.e. the
173 minimum code-point) of the current charset, or -1 if the code
174 index 0 is not a Unicode character. This is checked when
175 table.encoder[CHAR] is zero. */
179 /* Table mapping code-indices (not code-points) of the current
180 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
181 doesn't belong to the current charset. */
182 int decoder
[0x10000];
183 /* Table mapping Unicode characters to code-indices of the current
184 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
185 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
186 (0x20000..0x2FFFF). Note that there is no charset map that
187 uses both SMP and SIP. */
188 unsigned short encoder
[0x20000];
190 } *temp_charset_work
;
192 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
195 temp_charset_work->zero_index_char = (C); \
196 else if ((C) < 0x20000) \
197 temp_charset_work->table.encoder[(C)] = (CODE); \
199 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
202 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
203 ((C) == temp_charset_work->zero_index_char ? 0 \
204 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
205 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
206 : temp_charset_work->table.encoder[(C) - 0x10000] \
207 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
209 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
210 (temp_charset_work->table.decoder[(CODE)] = (C))
212 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
213 (temp_charset_work->table.decoder[(CODE)])
216 /* Set to 1 to warn that a charset map is loaded and thus a buffer
217 text and a string data may be relocated. */
218 bool charset_map_loaded
;
220 struct charset_map_entries
226 struct charset_map_entries
*next
;
229 /* Load the mapping information of CHARSET from ENTRIES for
230 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
231 encoding (CONTROL_FLAG == 2).
233 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
234 and CHARSET->fast_map.
236 If CONTROL_FLAG is 1, setup the following tables according to
237 CHARSET->method and inhibit_load_charset_map.
239 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
240 ----------------------+--------------------+---------------------------
241 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
242 ----------------------+--------------------+---------------------------
243 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
245 If CONTROL_FLAG is 2, setup the following tables.
247 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
248 ----------------------+--------------------+---------------------------
249 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
250 ----------------------+--------------------+--------------------------
251 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
255 load_charset_map (struct charset
*charset
, struct charset_map_entries
*entries
, int n_entries
, int control_flag
)
257 Lisp_Object vec
IF_LINT (= Qnil
), table
IF_LINT (= Qnil
);
258 unsigned max_code
= CHARSET_MAX_CODE (charset
);
259 bool ascii_compatible_p
= charset
->ascii_compatible_p
;
260 int min_char
, max_char
, nonascii_min_char
;
262 unsigned char *fast_map
= charset
->fast_map
;
269 if (! inhibit_load_charset_map
)
271 if (control_flag
== 1)
273 if (charset
->method
== CHARSET_METHOD_MAP
)
275 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
277 vec
= Fmake_vector (make_number (n
), make_number (-1));
278 set_charset_attr (charset
, charset_decoder
, vec
);
282 char_table_set_range (Vchar_unify_table
,
283 charset
->min_char
, charset
->max_char
,
289 table
= Fmake_char_table (Qnil
, Qnil
);
290 set_charset_attr (charset
,
291 (charset
->method
== CHARSET_METHOD_MAP
292 ? charset_encoder
: charset_deunifier
),
298 if (! temp_charset_work
)
299 temp_charset_work
= xmalloc (sizeof *temp_charset_work
);
300 if (control_flag
== 1)
302 memset (temp_charset_work
->table
.decoder
, -1,
303 sizeof (int) * 0x10000);
307 memset (temp_charset_work
->table
.encoder
, 0,
308 sizeof (unsigned short) * 0x20000);
309 temp_charset_work
->zero_index_char
= -1;
311 temp_charset_work
->current
= charset
;
312 temp_charset_work
->for_encoder
= (control_flag
== 2);
315 charset_map_loaded
= 1;
318 min_char
= max_char
= entries
->entry
[0].c
;
319 nonascii_min_char
= MAX_CHAR
;
320 for (i
= 0; i
< n_entries
; i
++)
323 int from_index
, to_index
, lim_index
;
325 int idx
= i
% 0x10000;
327 if (i
> 0 && idx
== 0)
328 entries
= entries
->next
;
329 from
= entries
->entry
[idx
].from
;
330 to
= entries
->entry
[idx
].to
;
331 from_c
= entries
->entry
[idx
].c
;
332 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
335 to_index
= from_index
;
340 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
341 to_c
= from_c
+ (to_index
- from_index
);
343 if (from_index
< 0 || to_index
< 0)
345 lim_index
= to_index
+ 1;
349 else if (from_c
< min_char
)
352 if (control_flag
== 1)
354 if (charset
->method
== CHARSET_METHOD_MAP
)
355 for (; from_index
< lim_index
; from_index
++, from_c
++)
356 ASET (vec
, from_index
, make_number (from_c
));
358 for (; from_index
< lim_index
; from_index
++, from_c
++)
359 CHAR_TABLE_SET (Vchar_unify_table
,
360 CHARSET_CODE_OFFSET (charset
) + from_index
,
361 make_number (from_c
));
363 else if (control_flag
== 2)
365 if (charset
->method
== CHARSET_METHOD_MAP
366 && CHARSET_COMPACT_CODES_P (charset
))
367 for (; from_index
< lim_index
; from_index
++, from_c
++)
369 unsigned code
= from_index
;
370 code
= INDEX_TO_CODE_POINT (charset
, code
);
372 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
373 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
376 for (; from_index
< lim_index
; from_index
++, from_c
++)
378 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
379 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
382 else if (control_flag
== 3)
383 for (; from_index
< lim_index
; from_index
++, from_c
++)
384 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
385 else if (control_flag
== 4)
386 for (; from_index
< lim_index
; from_index
++, from_c
++)
387 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
388 else /* control_flag == 0 */
390 if (ascii_compatible_p
)
392 if (! ASCII_CHAR_P (from_c
))
394 if (from_c
< nonascii_min_char
)
395 nonascii_min_char
= from_c
;
397 else if (! ASCII_CHAR_P (to_c
))
399 nonascii_min_char
= 0x80;
403 for (; from_c
<= to_c
; from_c
++)
404 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
408 if (control_flag
== 0)
410 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
411 ? nonascii_min_char
: min_char
);
412 CHARSET_MAX_CHAR (charset
) = max_char
;
414 else if (control_flag
== 4)
416 temp_charset_work
->min_char
= min_char
;
417 temp_charset_work
->max_char
= max_char
;
422 /* Read a hexadecimal number (preceded by "0x") from the file FP while
423 paying attention to comment character '#'. */
426 read_hex (FILE *fp
, bool *eof
, bool *overflow
)
431 while ((c
= getc (fp
)) != EOF
)
435 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
439 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
449 while (c_isxdigit (c
= getc (fp
)))
451 if (UINT_MAX
>> 4 < n
)
454 | (c
- ('0' <= c
&& c
<= '9' ? '0'
455 : 'A' <= c
&& c
<= 'F' ? 'A' - 10
463 /* Return a mapping vector for CHARSET loaded from MAPFILE.
464 Each line of MAPFILE has this form
466 where 0xAAAA is a code-point and 0xCCCC is the corresponding
467 character code, or this form
469 where 0xAAAA and 0xBBBB are code-points specifying a range, and
470 0xCCCC is the first character code of the range.
472 The returned vector has this form:
473 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
474 where CODE1 is a code-point or a cons of code-points specifying a
477 Note that this function uses `openp' to open MAPFILE but ignores
478 `file-name-handler-alist' to avoid running any Lisp code. */
481 load_charset_map_from_file (struct charset
*charset
, Lisp_Object mapfile
,
484 unsigned min_code
= CHARSET_MIN_CODE (charset
);
485 unsigned max_code
= CHARSET_MAX_CODE (charset
);
488 Lisp_Object suffixes
;
489 struct charset_map_entries
*head
, *entries
;
493 suffixes
= list2 (build_string (".map"), build_string (".TXT"));
495 count
= SPECPDL_INDEX ();
496 record_unwind_protect_nothing ();
497 specbind (Qfile_name_handler_alist
, Qnil
);
498 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
, false);
499 fp
= fd
< 0 ? 0 : fdopen (fd
, "r");
502 int open_errno
= errno
;
504 report_file_errno ("Loading charset map", mapfile
, open_errno
);
506 set_unwind_protect_ptr (count
, fclose_unwind
, fp
);
507 unbind_to (count
+ 1, Qnil
);
509 /* Use record_xmalloc, as `charset_map_entries' is
510 large (larger than MAX_ALLOCA). */
511 head
= record_xmalloc (sizeof *head
);
513 memset (entries
, 0, sizeof (struct charset_map_entries
));
518 unsigned from
, to
, c
;
520 bool eof
= 0, overflow
= 0;
522 from
= read_hex (fp
, &eof
, &overflow
);
525 if (getc (fp
) == '-')
526 to
= read_hex (fp
, &eof
, &overflow
);
531 c
= read_hex (fp
, &eof
, &overflow
);
537 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
540 if (n_entries
== 0x10000)
542 entries
->next
= record_xmalloc (sizeof *entries
->next
);
543 entries
= entries
->next
;
544 memset (entries
, 0, sizeof (struct charset_map_entries
));
548 entries
->entry
[idx
].from
= from
;
549 entries
->entry
[idx
].to
= to
;
550 entries
->entry
[idx
].c
= c
;
554 clear_unwind_protect (count
);
556 load_charset_map (charset
, head
, n_entries
, control_flag
);
557 unbind_to (count
, Qnil
);
561 load_charset_map_from_vector (struct charset
*charset
, Lisp_Object vec
, int control_flag
)
563 unsigned min_code
= CHARSET_MIN_CODE (charset
);
564 unsigned max_code
= CHARSET_MAX_CODE (charset
);
565 struct charset_map_entries
*head
, *entries
;
567 int len
= ASIZE (vec
);
573 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
577 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
578 large (larger than MAX_ALLOCA). */
579 head
= SAFE_ALLOCA (sizeof *head
);
581 memset (entries
, 0, sizeof (struct charset_map_entries
));
584 for (i
= 0; i
< len
; i
+= 2)
586 Lisp_Object val
, val2
;
596 from
= XFASTINT (val
);
597 to
= XFASTINT (val2
);
600 from
= to
= XFASTINT (val
);
601 val
= AREF (vec
, i
+ 1);
605 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
608 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
610 entries
->next
= SAFE_ALLOCA (sizeof *entries
->next
);
611 entries
= entries
->next
;
612 memset (entries
, 0, sizeof (struct charset_map_entries
));
614 idx
= n_entries
% 0x10000;
615 entries
->entry
[idx
].from
= from
;
616 entries
->entry
[idx
].to
= to
;
617 entries
->entry
[idx
].c
= c
;
621 load_charset_map (charset
, head
, n_entries
, control_flag
);
626 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
627 map it is (see the comment of load_charset_map for the detail). */
630 load_charset (struct charset
*charset
, int control_flag
)
634 if (inhibit_load_charset_map
636 && charset
== temp_charset_work
->current
637 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
640 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
641 map
= CHARSET_MAP (charset
);
644 if (! CHARSET_UNIFIED_P (charset
))
646 map
= CHARSET_UNIFY_MAP (charset
);
649 load_charset_map_from_file (charset
, map
, control_flag
);
651 load_charset_map_from_vector (charset
, map
, control_flag
);
655 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
656 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
659 return (CHARSETP (object
) ? Qt
: Qnil
);
664 map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
),
665 Lisp_Object function
, Lisp_Object arg
,
666 unsigned int from
, unsigned int to
)
668 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
669 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
670 Lisp_Object range
= Fcons (Qnil
, Qnil
);
673 c
= temp_charset_work
->min_char
;
674 stop
= (temp_charset_work
->max_char
< 0x20000
675 ? temp_charset_work
->max_char
: 0xFFFF);
679 int idx
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
681 if (idx
>= from_idx
&& idx
<= to_idx
)
683 if (NILP (XCAR (range
)))
684 XSETCAR (range
, make_number (c
));
686 else if (! NILP (XCAR (range
)))
688 XSETCDR (range
, make_number (c
- 1));
690 (*c_function
) (arg
, range
);
692 call2 (function
, range
, arg
);
693 XSETCAR (range
, Qnil
);
697 if (c
== temp_charset_work
->max_char
)
699 if (! NILP (XCAR (range
)))
701 XSETCDR (range
, make_number (c
));
703 (*c_function
) (arg
, range
);
705 call2 (function
, range
, arg
);
710 stop
= temp_charset_work
->max_char
;
717 map_charset_chars (void (*c_function
)(Lisp_Object
, Lisp_Object
), Lisp_Object function
,
718 Lisp_Object arg
, struct charset
*charset
, unsigned from
, unsigned to
)
721 bool partial
= (from
> CHARSET_MIN_CODE (charset
)
722 || to
< CHARSET_MAX_CODE (charset
));
724 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
726 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
727 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
728 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
729 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
731 if (CHARSET_UNIFIED_P (charset
))
733 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
734 load_charset (charset
, 2);
735 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
736 map_char_table_for_charset (c_function
, function
,
737 CHARSET_DEUNIFIER (charset
), arg
,
738 partial
? charset
: NULL
, from
, to
);
740 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
743 range
= Fcons (make_number (from_c
), make_number (to_c
));
745 (*c_function
) (arg
, range
);
747 call2 (function
, range
, arg
);
749 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
751 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
752 load_charset (charset
, 2);
753 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
754 map_char_table_for_charset (c_function
, function
,
755 CHARSET_ENCODER (charset
), arg
,
756 partial
? charset
: NULL
, from
, to
);
758 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
760 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
762 Lisp_Object subset_info
;
765 subset_info
= CHARSET_SUBSET (charset
);
766 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
767 offset
= XINT (AREF (subset_info
, 3));
769 if (from
< XFASTINT (AREF (subset_info
, 1)))
770 from
= XFASTINT (AREF (subset_info
, 1));
772 if (to
> XFASTINT (AREF (subset_info
, 2)))
773 to
= XFASTINT (AREF (subset_info
, 2));
774 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
776 else /* i.e. CHARSET_METHOD_SUPERSET */
780 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
781 parents
= XCDR (parents
))
784 unsigned this_from
, this_to
;
786 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
787 offset
= XINT (XCDR (XCAR (parents
)));
788 this_from
= from
> offset
? from
- offset
: 0;
789 this_to
= to
> offset
? to
- offset
: 0;
790 if (this_from
< CHARSET_MIN_CODE (charset
))
791 this_from
= CHARSET_MIN_CODE (charset
);
792 if (this_to
> CHARSET_MAX_CODE (charset
))
793 this_to
= CHARSET_MAX_CODE (charset
);
794 map_charset_chars (c_function
, function
, arg
, charset
,
800 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
801 doc
: /* Call FUNCTION for all characters in CHARSET.
802 FUNCTION is called with an argument RANGE and the optional 3rd
805 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
806 characters contained in CHARSET.
808 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
809 range of code points (in CHARSET) of target characters. */)
810 (Lisp_Object function
, Lisp_Object charset
, Lisp_Object arg
, Lisp_Object from_code
, Lisp_Object to_code
)
815 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
816 if (NILP (from_code
))
817 from
= CHARSET_MIN_CODE (cs
);
820 from
= XINT (from_code
);
821 if (from
< CHARSET_MIN_CODE (cs
))
822 from
= CHARSET_MIN_CODE (cs
);
825 to
= CHARSET_MAX_CODE (cs
);
829 if (to
> CHARSET_MAX_CODE (cs
))
830 to
= CHARSET_MAX_CODE (cs
);
832 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
837 /* Define a charset according to the arguments. The Nth argument is
838 the Nth attribute of the charset (the last attribute `charset-id'
839 is not included). See the docstring of `define-charset' for the
842 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
843 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
844 doc
: /* For internal use only.
845 usage: (define-charset-internal ...) */)
846 (ptrdiff_t nargs
, Lisp_Object
*args
)
848 /* Charset attr vector. */
851 EMACS_UINT hash_code
;
852 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
854 struct charset charset
;
857 bool new_definition_p
;
860 if (nargs
!= charset_arg_max
)
861 return Fsignal (Qwrong_number_of_arguments
,
862 Fcons (intern ("define-charset-internal"),
863 make_number (nargs
)));
865 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
867 CHECK_SYMBOL (args
[charset_arg_name
]);
868 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
870 val
= args
[charset_arg_code_space
];
871 for (i
= 0, dimension
= 0, nchars
= 1; ; i
++)
873 Lisp_Object min_byte_obj
, max_byte_obj
;
874 int min_byte
, max_byte
;
876 min_byte_obj
= Faref (val
, make_number (i
* 2));
877 max_byte_obj
= Faref (val
, make_number (i
* 2 + 1));
878 CHECK_RANGED_INTEGER (min_byte_obj
, 0, 255);
879 min_byte
= XINT (min_byte_obj
);
880 CHECK_RANGED_INTEGER (max_byte_obj
, min_byte
, 255);
881 max_byte
= XINT (max_byte_obj
);
882 charset
.code_space
[i
* 4] = min_byte
;
883 charset
.code_space
[i
* 4 + 1] = max_byte
;
884 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
889 nchars
*= charset
.code_space
[i
* 4 + 2];
890 charset
.code_space
[i
* 4 + 3] = nchars
;
893 val
= args
[charset_arg_dimension
];
895 charset
.dimension
= dimension
;
898 CHECK_RANGED_INTEGER (val
, 1, 4);
899 charset
.dimension
= XINT (val
);
902 charset
.code_linear_p
903 = (charset
.dimension
== 1
904 || (charset
.code_space
[2] == 256
905 && (charset
.dimension
== 2
906 || (charset
.code_space
[6] == 256
907 && (charset
.dimension
== 3
908 || charset
.code_space
[10] == 256)))));
910 if (! charset
.code_linear_p
)
912 charset
.code_space_mask
= xzalloc (256);
913 for (i
= 0; i
< 4; i
++)
914 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
916 charset
.code_space_mask
[j
] |= (1 << i
);
919 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
921 charset
.min_code
= (charset
.code_space
[0]
922 | (charset
.code_space
[4] << 8)
923 | (charset
.code_space
[8] << 16)
924 | ((unsigned) charset
.code_space
[12] << 24));
925 charset
.max_code
= (charset
.code_space
[1]
926 | (charset
.code_space
[5] << 8)
927 | (charset
.code_space
[9] << 16)
928 | ((unsigned) charset
.code_space
[13] << 24));
929 charset
.char_index_offset
= 0;
931 val
= args
[charset_arg_min_code
];
934 unsigned code
= cons_to_unsigned (val
, UINT_MAX
);
936 if (code
< charset
.min_code
937 || code
> charset
.max_code
)
938 args_out_of_range_3 (make_fixnum_or_float (charset
.min_code
),
939 make_fixnum_or_float (charset
.max_code
), val
);
940 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
941 charset
.min_code
= code
;
944 val
= args
[charset_arg_max_code
];
947 unsigned code
= cons_to_unsigned (val
, UINT_MAX
);
949 if (code
< charset
.min_code
950 || code
> charset
.max_code
)
951 args_out_of_range_3 (make_fixnum_or_float (charset
.min_code
),
952 make_fixnum_or_float (charset
.max_code
), val
);
953 charset
.max_code
= code
;
956 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
958 val
= args
[charset_arg_invalid_code
];
961 if (charset
.min_code
> 0)
962 charset
.invalid_code
= 0;
965 if (charset
.max_code
< UINT_MAX
)
966 charset
.invalid_code
= charset
.max_code
+ 1;
968 error ("Attribute :invalid-code must be specified");
972 charset
.invalid_code
= cons_to_unsigned (val
, UINT_MAX
);
974 val
= args
[charset_arg_iso_final
];
976 charset
.iso_final
= -1;
980 if (XINT (val
) < '0' || XINT (val
) > 127)
981 error ("Invalid iso-final-char: %"pI
"d", XINT (val
));
982 charset
.iso_final
= XINT (val
);
985 val
= args
[charset_arg_iso_revision
];
987 charset
.iso_revision
= -1;
990 CHECK_RANGED_INTEGER (val
, -1, 63);
991 charset
.iso_revision
= XINT (val
);
994 val
= args
[charset_arg_emacs_mule_id
];
996 charset
.emacs_mule_id
= -1;
1000 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1001 error ("Invalid emacs-mule-id: %"pI
"d", XINT (val
));
1002 charset
.emacs_mule_id
= XINT (val
);
1005 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1007 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1009 charset
.unified_p
= 0;
1011 memset (charset
.fast_map
, 0, sizeof (charset
.fast_map
));
1013 if (! NILP (args
[charset_arg_code_offset
]))
1015 val
= args
[charset_arg_code_offset
];
1016 CHECK_CHARACTER (val
);
1018 charset
.method
= CHARSET_METHOD_OFFSET
;
1019 charset
.code_offset
= XINT (val
);
1021 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1022 if (MAX_CHAR
- charset
.code_offset
< i
)
1023 error ("Unsupported max char: %d", charset
.max_char
);
1024 charset
.max_char
= i
+ charset
.code_offset
;
1025 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1026 charset
.min_char
= i
+ charset
.code_offset
;
1028 i
= (charset
.min_char
>> 7) << 7;
1029 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1030 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1031 i
= (i
>> 12) << 12;
1032 for (; i
<= charset
.max_char
; i
+= 0x1000)
1033 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1034 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1035 charset
.ascii_compatible_p
= 1;
1037 else if (! NILP (args
[charset_arg_map
]))
1039 val
= args
[charset_arg_map
];
1040 ASET (attrs
, charset_map
, val
);
1041 charset
.method
= CHARSET_METHOD_MAP
;
1043 else if (! NILP (args
[charset_arg_subset
]))
1046 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1047 struct charset
*parent_charset
;
1049 val
= args
[charset_arg_subset
];
1050 parent
= Fcar (val
);
1051 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1052 parent_min_code
= Fnth (make_number (1), val
);
1053 CHECK_NATNUM (parent_min_code
);
1054 parent_max_code
= Fnth (make_number (2), val
);
1055 CHECK_NATNUM (parent_max_code
);
1056 parent_code_offset
= Fnth (make_number (3), val
);
1057 CHECK_NUMBER (parent_code_offset
);
1058 val
= make_uninit_vector (4);
1059 ASET (val
, 0, make_number (parent_charset
->id
));
1060 ASET (val
, 1, parent_min_code
);
1061 ASET (val
, 2, parent_max_code
);
1062 ASET (val
, 3, parent_code_offset
);
1063 ASET (attrs
, charset_subset
, val
);
1065 charset
.method
= CHARSET_METHOD_SUBSET
;
1066 /* Here, we just copy the parent's fast_map. It's not accurate,
1067 but at least it works for quickly detecting which character
1068 DOESN'T belong to this charset. */
1069 for (i
= 0; i
< 190; i
++)
1070 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1072 /* We also copy these for parents. */
1073 charset
.min_char
= parent_charset
->min_char
;
1074 charset
.max_char
= parent_charset
->max_char
;
1076 else if (! NILP (args
[charset_arg_superset
]))
1078 val
= args
[charset_arg_superset
];
1079 charset
.method
= CHARSET_METHOD_SUPERSET
;
1080 val
= Fcopy_sequence (val
);
1081 ASET (attrs
, charset_superset
, val
);
1083 charset
.min_char
= MAX_CHAR
;
1084 charset
.max_char
= 0;
1085 for (; ! NILP (val
); val
= Fcdr (val
))
1087 Lisp_Object elt
, car_part
, cdr_part
;
1088 int this_id
, offset
;
1089 struct charset
*this_charset
;
1094 car_part
= XCAR (elt
);
1095 cdr_part
= XCDR (elt
);
1096 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1097 CHECK_TYPE_RANGED_INTEGER (int, cdr_part
);
1098 offset
= XINT (cdr_part
);
1102 CHECK_CHARSET_GET_ID (elt
, this_id
);
1105 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1107 this_charset
= CHARSET_FROM_ID (this_id
);
1108 if (charset
.min_char
> this_charset
->min_char
)
1109 charset
.min_char
= this_charset
->min_char
;
1110 if (charset
.max_char
< this_charset
->max_char
)
1111 charset
.max_char
= this_charset
->max_char
;
1112 for (i
= 0; i
< 190; i
++)
1113 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1117 error ("None of :code-offset, :map, :parents are specified");
1119 val
= args
[charset_arg_unify_map
];
1120 if (! NILP (val
) && !STRINGP (val
))
1122 ASET (attrs
, charset_unify_map
, val
);
1124 CHECK_LIST (args
[charset_arg_plist
]);
1125 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1127 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1129 if (charset
.hash_index
>= 0)
1131 new_definition_p
= 0;
1132 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1133 set_hash_value_slot (hash_table
, charset
.hash_index
, attrs
);
1137 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1139 if (charset_table_used
== charset_table_size
)
1141 /* Ensure that charset IDs fit into 'int' as well as into the
1142 restriction imposed by fixnums. Although the 'int' restriction
1143 could be removed, too much other code would need altering; for
1144 example, the IDs are stuffed into struct
1145 coding_system.charbuf[i] entries, which are 'int'. */
1146 int old_size
= charset_table_size
;
1147 ptrdiff_t new_size
= old_size
;
1148 struct charset
*new_table
=
1149 xpalloc (0, &new_size
, 1,
1150 min (INT_MAX
, MOST_POSITIVE_FIXNUM
),
1151 sizeof *charset_table
);
1152 memcpy (new_table
, charset_table
, old_size
* sizeof *new_table
);
1153 charset_table
= new_table
;
1154 charset_table_size
= new_size
;
1155 /* FIXME: This leaks memory, as the old charset_table becomes
1156 unreachable. If the old charset table is charset_table_init
1157 then this leak is intentional; otherwise, it's unclear.
1158 If the latter memory leak is intentional, a
1159 comment should be added to explain this. If not, the old
1160 charset_table should be freed, by passing it as the 1st argument
1161 to xpalloc and removing the memcpy. */
1163 id
= charset_table_used
++;
1164 new_definition_p
= 1;
1167 ASET (attrs
, charset_id
, make_number (id
));
1169 charset_table
[id
] = charset
;
1171 if (charset
.method
== CHARSET_METHOD_MAP
)
1173 load_charset (&charset
, 0);
1174 charset_table
[id
] = charset
;
1177 if (charset
.iso_final
>= 0)
1179 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1180 charset
.iso_final
) = id
;
1181 if (new_definition_p
)
1182 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1183 list1 (make_number (id
)));
1184 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1185 charset_jisx0201_roman
= id
;
1186 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1187 charset_jisx0208_1978
= id
;
1188 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1189 charset_jisx0208
= id
;
1190 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1191 charset_ksc5601
= id
;
1194 if (charset
.emacs_mule_id
>= 0)
1196 emacs_mule_charset
[charset
.emacs_mule_id
] = id
;
1197 if (charset
.emacs_mule_id
< 0xA0)
1198 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1200 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1201 if (new_definition_p
)
1202 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1203 list1 (make_number (id
)));
1206 if (new_definition_p
)
1208 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1209 if (charset
.supplementary_p
)
1210 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1211 list1 (make_number (id
)));
1216 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1218 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1220 if (cs
->supplementary_p
)
1223 if (EQ (tail
, Vcharset_ordered_list
))
1224 Vcharset_ordered_list
= Fcons (make_number (id
),
1225 Vcharset_ordered_list
);
1226 else if (NILP (tail
))
1227 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1228 list1 (make_number (id
)));
1231 val
= Fcons (XCAR (tail
), XCDR (tail
));
1232 XSETCDR (tail
, val
);
1233 XSETCAR (tail
, make_number (id
));
1236 charset_ordered_list_tick
++;
1243 /* Same as Fdefine_charset_internal but arguments are more convenient
1244 to call from C (typically in syms_of_charset). This can define a
1245 charset of `offset' method only. Return the ID of the new
1249 define_charset_internal (Lisp_Object name
,
1251 const char *code_space_chars
,
1252 unsigned min_code
, unsigned max_code
,
1253 int iso_final
, int iso_revision
, int emacs_mule_id
,
1254 bool ascii_compatible
, bool supplementary
,
1257 const unsigned char *code_space
= (const unsigned char *) code_space_chars
;
1258 Lisp_Object args
[charset_arg_max
];
1262 args
[charset_arg_name
] = name
;
1263 args
[charset_arg_dimension
] = make_number (dimension
);
1264 val
= make_uninit_vector (8);
1265 for (i
= 0; i
< 8; i
++)
1266 ASET (val
, i
, make_number (code_space
[i
]));
1267 args
[charset_arg_code_space
] = val
;
1268 args
[charset_arg_min_code
] = make_number (min_code
);
1269 args
[charset_arg_max_code
] = make_number (max_code
);
1270 args
[charset_arg_iso_final
]
1271 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1272 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1273 args
[charset_arg_emacs_mule_id
]
1274 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1275 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1276 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1277 args
[charset_arg_invalid_code
] = Qnil
;
1278 args
[charset_arg_code_offset
] = make_number (code_offset
);
1279 args
[charset_arg_map
] = Qnil
;
1280 args
[charset_arg_subset
] = Qnil
;
1281 args
[charset_arg_superset
] = Qnil
;
1282 args
[charset_arg_unify_map
] = Qnil
;
1284 args
[charset_arg_plist
] =
1285 listn (CONSTYPE_HEAP
, 14,
1286 intern_c_string (":name"),
1287 args
[charset_arg_name
],
1288 intern_c_string (":dimension"),
1289 args
[charset_arg_dimension
],
1290 intern_c_string (":code-space"),
1291 args
[charset_arg_code_space
],
1292 intern_c_string (":iso-final-char"),
1293 args
[charset_arg_iso_final
],
1294 intern_c_string (":emacs-mule-id"),
1295 args
[charset_arg_emacs_mule_id
],
1296 intern_c_string (":ascii-compatible-p"),
1297 args
[charset_arg_ascii_compatible_p
],
1298 intern_c_string (":code-offset"),
1299 args
[charset_arg_code_offset
]);
1300 Fdefine_charset_internal (charset_arg_max
, args
);
1302 return XINT (CHARSET_SYMBOL_ID (name
));
1306 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1307 Sdefine_charset_alias
, 2, 2, 0,
1308 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1309 (Lisp_Object alias
, Lisp_Object charset
)
1313 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1314 Fputhash (alias
, attr
, Vcharset_hash_table
);
1315 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1320 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1321 doc
: /* Return the property list of CHARSET. */)
1322 (Lisp_Object charset
)
1326 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1327 return CHARSET_ATTR_PLIST (attrs
);
1331 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1332 doc
: /* Set CHARSET's property list to PLIST. */)
1333 (Lisp_Object charset
, Lisp_Object plist
)
1337 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1338 ASET (attrs
, charset_plist
, plist
);
1343 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1344 doc
: /* Unify characters of CHARSET with Unicode.
1345 This means reading the relevant file and installing the table defined
1346 by CHARSET's `:unify-map' property.
1348 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1349 the same meaning as the `:unify-map' attribute in the function
1350 `define-charset' (which see).
1352 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1353 (Lisp_Object charset
, Lisp_Object unify_map
, Lisp_Object deunify
)
1358 CHECK_CHARSET_GET_ID (charset
, id
);
1359 cs
= CHARSET_FROM_ID (id
);
1361 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1362 : ! CHARSET_UNIFIED_P (cs
))
1365 CHARSET_UNIFIED_P (cs
) = 0;
1368 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1369 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1370 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1371 if (NILP (unify_map
))
1372 unify_map
= CHARSET_UNIFY_MAP (cs
);
1375 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1376 signal_error ("Bad unify-map", unify_map
);
1377 set_charset_attr (cs
, charset_unify_map
, unify_map
);
1379 if (NILP (Vchar_unify_table
))
1380 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1381 char_table_set_range (Vchar_unify_table
,
1382 cs
->min_char
, cs
->max_char
, charset
);
1383 CHARSET_UNIFIED_P (cs
) = 1;
1385 else if (CHAR_TABLE_P (Vchar_unify_table
))
1387 unsigned min_code
= CHARSET_MIN_CODE (cs
);
1388 unsigned max_code
= CHARSET_MAX_CODE (cs
);
1389 int min_char
= DECODE_CHAR (cs
, min_code
);
1390 int max_char
= DECODE_CHAR (cs
, max_code
);
1392 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1398 /* Check that DIMENSION, CHARS, and FINAL_CHAR specify a valid ISO charset.
1399 Return true if it's a 96-character set, false if 94. */
1402 check_iso_charset_parameter (Lisp_Object dimension
, Lisp_Object chars
,
1403 Lisp_Object final_char
)
1405 CHECK_NUMBER (dimension
);
1406 CHECK_NUMBER (chars
);
1407 CHECK_CHARACTER (final_char
);
1409 if (! (1 <= XINT (dimension
) && XINT (dimension
) <= 3))
1410 error ("Invalid DIMENSION %"pI
"d, it should be 1, 2, or 3",
1413 bool chars_flag
= XINT (chars
) == 96;
1414 if (! (chars_flag
|| XINT (chars
) == 94))
1415 error ("Invalid CHARS %"pI
"d, it should be 94 or 96", XINT (chars
));
1417 int final_ch
= XFASTINT (final_char
);
1418 if (! ('0' <= final_ch
&& final_ch
<= '~'))
1419 error ("Invalid FINAL-CHAR '%c', it should be '0'..'~'", final_ch
);
1424 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1425 Sget_unused_iso_final_char
, 2, 2, 0,
1427 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1428 DIMENSION is the number of bytes to represent a character: 1 or 2.
1429 CHARS is the number of characters in a dimension: 94 or 96.
1431 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1432 If there's no unused final char for the specified kind of charset,
1434 (Lisp_Object dimension
, Lisp_Object chars
)
1436 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
,
1438 for (int final_char
= '0'; final_char
<= '?'; final_char
++)
1439 if (ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, final_char
) < 0)
1440 return make_number (final_char
);
1445 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1447 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1449 On decoding by an ISO-2022 base coding system, when a charset
1450 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1451 if CHARSET is designated instead. */)
1452 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
, Lisp_Object charset
)
1456 CHECK_CHARSET_GET_ID (charset
, id
);
1457 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
, final_char
);
1458 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XFASTINT (final_char
)) = id
;
1463 /* Return information about charsets in the text at PTR of NBYTES
1464 bytes, which are NCHARS characters. The value is:
1466 0: Each character is represented by one byte. This is always
1467 true for a unibyte string. For a multibyte string, true if
1468 it contains only ASCII characters.
1470 1: No charsets other than ascii, control-1, and latin-1 are
1477 string_xstring_p (Lisp_Object string
)
1479 const unsigned char *p
= SDATA (string
);
1480 const unsigned char *endp
= p
+ SBYTES (string
);
1482 if (SCHARS (string
) == SBYTES (string
))
1487 int c
= STRING_CHAR_ADVANCE (p
);
1496 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1498 CHARSETS is a vector. If Nth element is non-nil, it means the
1499 charset whose id is N is already found.
1501 It may lookup a translation table TABLE if supplied. */
1504 find_charsets_in_text (const unsigned char *ptr
, ptrdiff_t nchars
,
1505 ptrdiff_t nbytes
, Lisp_Object charsets
,
1506 Lisp_Object table
, bool multibyte
)
1508 const unsigned char *pend
= ptr
+ nbytes
;
1510 if (nchars
== nbytes
)
1513 ASET (charsets
, charset_ascii
, Qt
);
1520 c
= translate_char (table
, c
);
1521 if (ASCII_CHAR_P (c
))
1522 ASET (charsets
, charset_ascii
, Qt
);
1524 ASET (charsets
, charset_eight_bit
, Qt
);
1531 int c
= STRING_CHAR_ADVANCE (ptr
);
1532 struct charset
*charset
;
1535 c
= translate_char (table
, c
);
1536 charset
= CHAR_CHARSET (c
);
1537 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1542 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1544 doc
: /* Return a list of charsets in the region between BEG and END.
1545 BEG and END are buffer positions.
1546 Optional arg TABLE if non-nil is a translation table to look up.
1548 If the current buffer is unibyte, the returned list may contain
1549 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1550 (Lisp_Object beg
, Lisp_Object end
, Lisp_Object table
)
1552 Lisp_Object charsets
;
1553 ptrdiff_t from
, from_byte
, to
, stop
, stop_byte
;
1556 bool multibyte
= ! NILP (BVAR (current_buffer
, enable_multibyte_characters
));
1558 validate_region (&beg
, &end
);
1559 from
= XFASTINT (beg
);
1560 stop
= to
= XFASTINT (end
);
1562 if (from
< GPT
&& GPT
< to
)
1565 stop_byte
= GPT_BYTE
;
1568 stop_byte
= CHAR_TO_BYTE (stop
);
1570 from_byte
= CHAR_TO_BYTE (from
);
1572 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1575 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1576 stop_byte
- from_byte
, charsets
, table
,
1580 from
= stop
, from_byte
= stop_byte
;
1581 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1588 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1589 if (!NILP (AREF (charsets
, i
)))
1590 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1594 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1596 doc
: /* Return a list of charsets in STR.
1597 Optional arg TABLE if non-nil is a translation table to look up.
1599 If STR is unibyte, the returned list may contain
1600 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1601 (Lisp_Object str
, Lisp_Object table
)
1603 Lisp_Object charsets
;
1609 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1610 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1612 STRING_MULTIBYTE (str
));
1614 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1615 if (!NILP (AREF (charsets
, i
)))
1616 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1622 /* Return a unified character code for C (>= 0x110000). VAL is a
1623 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1626 maybe_unify_char (int c
, Lisp_Object val
)
1628 struct charset
*charset
;
1631 return XFASTINT (val
);
1635 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1637 /* The call to load_charset below can allocate memory, which screws
1638 callers of this function through STRING_CHAR_* macros that hold C
1639 pointers to buffer text, if REL_ALLOC is used. */
1640 r_alloc_inhibit_buffer_relocation (1);
1642 load_charset (charset
, 1);
1643 if (! inhibit_load_charset_map
)
1645 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1651 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1652 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1658 r_alloc_inhibit_buffer_relocation (0);
1664 /* Return a character corresponding to the code-point CODE of
1668 decode_char (struct charset
*charset
, unsigned int code
)
1671 enum charset_method method
= CHARSET_METHOD (charset
);
1673 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1676 if (method
== CHARSET_METHOD_SUBSET
)
1678 Lisp_Object subset_info
;
1680 subset_info
= CHARSET_SUBSET (charset
);
1681 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1682 code
-= XINT (AREF (subset_info
, 3));
1683 if (code
< XFASTINT (AREF (subset_info
, 1))
1684 || code
> XFASTINT (AREF (subset_info
, 2)))
1687 c
= DECODE_CHAR (charset
, code
);
1689 else if (method
== CHARSET_METHOD_SUPERSET
)
1691 Lisp_Object parents
;
1693 parents
= CHARSET_SUPERSET (charset
);
1695 for (; CONSP (parents
); parents
= XCDR (parents
))
1697 int id
= XINT (XCAR (XCAR (parents
)));
1698 int code_offset
= XINT (XCDR (XCAR (parents
)));
1699 unsigned this_code
= code
- code_offset
;
1701 charset
= CHARSET_FROM_ID (id
);
1702 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1708 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1712 if (method
== CHARSET_METHOD_MAP
)
1714 Lisp_Object decoder
;
1716 decoder
= CHARSET_DECODER (charset
);
1717 if (! VECTORP (decoder
))
1719 load_charset (charset
, 1);
1720 decoder
= CHARSET_DECODER (charset
);
1722 if (VECTORP (decoder
))
1723 c
= XINT (AREF (decoder
, char_index
));
1725 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1727 else /* method == CHARSET_METHOD_OFFSET */
1729 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1730 if (CHARSET_UNIFIED_P (charset
)
1731 && MAX_UNICODE_CHAR
< c
&& c
<= MAX_5_BYTE_CHAR
)
1733 /* Unify C with a Unicode character if possible. */
1734 Lisp_Object val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1735 c
= maybe_unify_char (c
, val
);
1743 /* Variable used temporarily by the macro ENCODE_CHAR. */
1744 Lisp_Object charset_work
;
1746 /* Return a code-point of C in CHARSET. If C doesn't belong to
1747 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1748 use CHARSET's strict_max_char instead of max_char. */
1751 encode_char (struct charset
*charset
, int c
)
1754 enum charset_method method
= CHARSET_METHOD (charset
);
1756 if (CHARSET_UNIFIED_P (charset
))
1758 Lisp_Object deunifier
;
1759 int code_index
= -1;
1761 deunifier
= CHARSET_DEUNIFIER (charset
);
1762 if (! CHAR_TABLE_P (deunifier
))
1764 load_charset (charset
, 2);
1765 deunifier
= CHARSET_DEUNIFIER (charset
);
1767 if (CHAR_TABLE_P (deunifier
))
1769 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1771 if (INTEGERP (deunified
))
1772 code_index
= XINT (deunified
);
1776 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1778 if (code_index
>= 0)
1779 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1782 if (method
== CHARSET_METHOD_SUBSET
)
1784 Lisp_Object subset_info
;
1785 struct charset
*this_charset
;
1787 subset_info
= CHARSET_SUBSET (charset
);
1788 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1789 code
= ENCODE_CHAR (this_charset
, c
);
1790 if (code
== CHARSET_INVALID_CODE (this_charset
)
1791 || code
< XFASTINT (AREF (subset_info
, 1))
1792 || code
> XFASTINT (AREF (subset_info
, 2)))
1793 return CHARSET_INVALID_CODE (charset
);
1794 code
+= XINT (AREF (subset_info
, 3));
1798 if (method
== CHARSET_METHOD_SUPERSET
)
1800 Lisp_Object parents
;
1802 parents
= CHARSET_SUPERSET (charset
);
1803 for (; CONSP (parents
); parents
= XCDR (parents
))
1805 int id
= XINT (XCAR (XCAR (parents
)));
1806 int code_offset
= XINT (XCDR (XCAR (parents
)));
1807 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1809 code
= ENCODE_CHAR (this_charset
, c
);
1810 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1811 return code
+ code_offset
;
1813 return CHARSET_INVALID_CODE (charset
);
1816 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1817 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1818 return CHARSET_INVALID_CODE (charset
);
1820 if (method
== CHARSET_METHOD_MAP
)
1822 Lisp_Object encoder
;
1825 encoder
= CHARSET_ENCODER (charset
);
1826 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1828 load_charset (charset
, 2);
1829 encoder
= CHARSET_ENCODER (charset
);
1831 if (CHAR_TABLE_P (encoder
))
1833 val
= CHAR_TABLE_REF (encoder
, c
);
1835 return CHARSET_INVALID_CODE (charset
);
1837 if (! CHARSET_COMPACT_CODES_P (charset
))
1838 code
= INDEX_TO_CODE_POINT (charset
, code
);
1842 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1843 code
= INDEX_TO_CODE_POINT (charset
, code
);
1846 else /* method == CHARSET_METHOD_OFFSET */
1848 unsigned code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1850 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1857 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1858 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1859 Return nil if CODE-POINT is not valid in CHARSET.
1861 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
1862 (Lisp_Object charset
, Lisp_Object code_point
, Lisp_Object restriction
)
1866 struct charset
*charsetp
;
1868 CHECK_CHARSET_GET_ID (charset
, id
);
1869 code
= cons_to_unsigned (code_point
, UINT_MAX
);
1870 charsetp
= CHARSET_FROM_ID (id
);
1871 c
= DECODE_CHAR (charsetp
, code
);
1872 return (c
>= 0 ? make_number (c
) : Qnil
);
1876 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1877 doc
: /* Encode the character CH into a code-point of CHARSET.
1878 Return nil if CHARSET doesn't include CH. */)
1879 (Lisp_Object ch
, Lisp_Object charset
, Lisp_Object restriction
)
1883 struct charset
*charsetp
;
1885 CHECK_CHARSET_GET_ID (charset
, id
);
1886 CHECK_CHARACTER (ch
);
1888 charsetp
= CHARSET_FROM_ID (id
);
1889 code
= ENCODE_CHAR (charsetp
, c
);
1890 if (code
== CHARSET_INVALID_CODE (charsetp
))
1892 return INTEGER_TO_CONS (code
);
1896 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1898 /* Return a character of CHARSET whose position codes are CODEn.
1900 CODE1 through CODE4 are optional, but if you don't supply sufficient
1901 position codes, it is assumed that the minimum code in each dimension
1903 (Lisp_Object charset
, Lisp_Object code1
, Lisp_Object code2
, Lisp_Object code3
, Lisp_Object code4
)
1906 struct charset
*charsetp
;
1910 CHECK_CHARSET_GET_ID (charset
, id
);
1911 charsetp
= CHARSET_FROM_ID (id
);
1913 dimension
= CHARSET_DIMENSION (charsetp
);
1915 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1916 ? 0 : CHARSET_MIN_CODE (charsetp
));
1919 CHECK_NATNUM (code1
);
1920 if (XFASTINT (code1
) >= 0x100)
1921 args_out_of_range (make_number (0xFF), code1
);
1922 code
= XFASTINT (code1
);
1928 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1931 CHECK_NATNUM (code2
);
1932 if (XFASTINT (code2
) >= 0x100)
1933 args_out_of_range (make_number (0xFF), code2
);
1934 code
|= XFASTINT (code2
);
1941 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1944 CHECK_NATNUM (code3
);
1945 if (XFASTINT (code3
) >= 0x100)
1946 args_out_of_range (make_number (0xFF), code3
);
1947 code
|= XFASTINT (code3
);
1954 code
|= charsetp
->code_space
[0];
1957 CHECK_NATNUM (code4
);
1958 if (XFASTINT (code4
) >= 0x100)
1959 args_out_of_range (make_number (0xFF), code4
);
1960 code
|= XFASTINT (code4
);
1967 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1969 c
= DECODE_CHAR (charsetp
, code
);
1971 error ("Invalid code(s)");
1972 return make_number (c
);
1976 /* Return the first charset in CHARSET_LIST that contains C.
1977 CHARSET_LIST is a list of charset IDs. If it is nil, use
1978 Vcharset_ordered_list. */
1981 char_charset (int c
, Lisp_Object charset_list
, unsigned int *code_return
)
1983 bool maybe_null
= 0;
1985 if (NILP (charset_list
))
1986 charset_list
= Vcharset_ordered_list
;
1990 while (CONSP (charset_list
))
1992 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
1993 unsigned code
= ENCODE_CHAR (charset
, c
);
1995 if (code
!= CHARSET_INVALID_CODE (charset
))
1998 *code_return
= code
;
2001 charset_list
= XCDR (charset_list
);
2003 && c
<= MAX_UNICODE_CHAR
2004 && EQ (charset_list
, Vcharset_non_preferred_head
))
2005 return CHARSET_FROM_ID (charset_unicode
);
2007 return (maybe_null
? NULL
2008 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2009 : CHARSET_FROM_ID (charset_eight_bit
));
2013 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2015 /*Return list of charset and one to four position-codes of CH.
2016 The charset is decided by the current priority order of charsets.
2017 A position-code is a byte value of each dimension of the code-point of
2018 CH in the charset. */)
2021 struct charset
*charset
;
2026 CHECK_CHARACTER (ch
);
2028 charset
= CHAR_CHARSET (c
);
2031 code
= ENCODE_CHAR (charset
, c
);
2032 if (code
== CHARSET_INVALID_CODE (charset
))
2034 dimension
= CHARSET_DIMENSION (charset
);
2035 for (val
= Qnil
; dimension
> 0; dimension
--)
2037 val
= Fcons (make_number (code
& 0xFF), val
);
2040 return Fcons (CHARSET_NAME (charset
), val
);
2044 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2045 doc
: /* Return the charset of highest priority that contains CH.
2046 ASCII characters are an exception: for them, this function always
2048 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2049 from which to find the charset. It may also be a coding system. In
2050 that case, find the charset from what supported by that coding system. */)
2051 (Lisp_Object ch
, Lisp_Object restriction
)
2053 struct charset
*charset
;
2055 CHECK_CHARACTER (ch
);
2056 if (NILP (restriction
))
2057 charset
= CHAR_CHARSET (XINT (ch
));
2060 if (CONSP (restriction
))
2062 int c
= XFASTINT (ch
);
2064 for (; CONSP (restriction
); restriction
= XCDR (restriction
))
2066 struct charset
*rcharset
;
2068 CHECK_CHARSET_GET_CHARSET (XCAR (restriction
), rcharset
);
2069 if (ENCODE_CHAR (rcharset
, c
) != CHARSET_INVALID_CODE (rcharset
))
2070 return XCAR (restriction
);
2074 restriction
= coding_system_charset_list (restriction
);
2075 charset
= char_charset (XINT (ch
), restriction
, NULL
);
2079 return (CHARSET_NAME (charset
));
2083 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2085 Return charset of a character in the current buffer at position POS.
2086 If POS is nil, it defaults to the current point.
2087 If POS is out of range, the value is nil. */)
2091 struct charset
*charset
;
2093 ch
= Fchar_after (pos
);
2094 if (! INTEGERP (ch
))
2096 charset
= CHAR_CHARSET (XINT (ch
));
2097 return (CHARSET_NAME (charset
));
2101 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2103 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2105 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2106 by their DIMENSION, CHARS, and FINAL-CHAR,
2107 whereas Emacs distinguishes them by charset symbol.
2108 See the documentation of the function `charset-info' for the meanings of
2109 DIMENSION, CHARS, and FINAL-CHAR. */)
2110 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
2112 bool chars_flag
= check_iso_charset_parameter (dimension
, chars
, final_char
);
2113 int id
= ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
,
2114 XFASTINT (final_char
));
2115 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2119 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2123 Clear temporary charset mapping tables.
2124 It should be called only from temacs invoked for dumping. */)
2127 if (temp_charset_work
)
2129 xfree (temp_charset_work
);
2130 temp_charset_work
= NULL
;
2133 if (CHAR_TABLE_P (Vchar_unify_table
))
2134 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2139 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2140 Scharset_priority_list
, 0, 1, 0,
2141 doc
: /* Return the list of charsets ordered by priority.
2142 HIGHESTP non-nil means just return the highest priority one. */)
2143 (Lisp_Object highestp
)
2145 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2147 if (!NILP (highestp
))
2148 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2150 while (!NILP (list
))
2152 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2155 return Fnreverse (val
);
2158 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2160 doc
: /* Assign higher priority to the charsets given as arguments.
2161 usage: (set-charset-priority &rest charsets) */)
2162 (ptrdiff_t nargs
, Lisp_Object
*args
)
2164 Lisp_Object new_head
, old_list
, arglist
[2];
2165 Lisp_Object list_2022
, list_emacs_mule
;
2169 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2171 for (i
= 0; i
< nargs
; i
++)
2173 CHECK_CHARSET_GET_ID (args
[i
], id
);
2174 if (! NILP (Fmemq (make_number (id
), old_list
)))
2176 old_list
= Fdelq (make_number (id
), old_list
);
2177 new_head
= Fcons (make_number (id
), new_head
);
2180 arglist
[0] = Fnreverse (new_head
);
2181 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2182 Vcharset_ordered_list
= Fnconc (2, arglist
);
2183 charset_ordered_list_tick
++;
2185 charset_unibyte
= -1;
2186 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2187 CONSP (old_list
); old_list
= XCDR (old_list
))
2189 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2190 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2191 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2192 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2193 if (charset_unibyte
< 0)
2195 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2197 if (CHARSET_DIMENSION (charset
) == 1
2198 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2199 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2200 charset_unibyte
= CHARSET_ID (charset
);
2203 Viso_2022_charset_list
= Fnreverse (list_2022
);
2204 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2205 if (charset_unibyte
< 0)
2206 charset_unibyte
= charset_iso_8859_1
;
2211 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2213 doc
: /* Internal use only.
2214 Return charset identification number of CHARSET. */)
2215 (Lisp_Object charset
)
2219 CHECK_CHARSET_GET_ID (charset
, id
);
2220 return make_number (id
);
2223 struct charset_sort_data
2225 Lisp_Object charset
;
2231 charset_compare (const void *d1
, const void *d2
)
2233 const struct charset_sort_data
*data1
= d1
, *data2
= d2
;
2234 if (data1
->priority
!= data2
->priority
)
2235 return data1
->priority
< data2
->priority
? -1 : 1;
2239 DEFUN ("sort-charsets", Fsort_charsets
, Ssort_charsets
, 1, 1, 0,
2240 doc
: /* Sort charset list CHARSETS by a priority of each charset.
2241 Return the sorted list. CHARSETS is modified by side effects.
2242 See also `charset-priority-list' and `set-charset-priority'. */)
2243 (Lisp_Object charsets
)
2245 Lisp_Object len
= Flength (charsets
);
2246 ptrdiff_t n
= XFASTINT (len
), i
, j
;
2248 Lisp_Object tail
, elt
, attrs
;
2249 struct charset_sort_data
*sort_data
;
2250 int id
, min_id
= INT_MAX
, max_id
= INT_MIN
;
2255 SAFE_NALLOCA (sort_data
, 1, n
);
2256 for (tail
= charsets
, i
= 0; CONSP (tail
); tail
= XCDR (tail
), i
++)
2259 CHECK_CHARSET_GET_ATTR (elt
, attrs
);
2260 sort_data
[i
].charset
= elt
;
2261 sort_data
[i
].id
= id
= XINT (CHARSET_ATTR_ID (attrs
));
2267 for (done
= 0, tail
= Vcharset_ordered_list
, i
= 0;
2268 done
< n
&& CONSP (tail
); tail
= XCDR (tail
), i
++)
2271 id
= XFASTINT (elt
);
2272 if (id
>= min_id
&& id
<= max_id
)
2273 for (j
= 0; j
< n
; j
++)
2274 if (sort_data
[j
].id
== id
)
2276 sort_data
[j
].priority
= i
;
2280 qsort (sort_data
, n
, sizeof *sort_data
, charset_compare
);
2281 for (i
= 0, tail
= charsets
; CONSP (tail
); tail
= XCDR (tail
), i
++)
2282 XSETCAR (tail
, sort_data
[i
].charset
);
2291 Lisp_Object tempdir
;
2292 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2293 if (! file_accessible_directory_p (tempdir
))
2295 /* This used to be non-fatal (dir_warning), but it should not
2296 happen, and if it does sooner or later it will cause some
2297 obscure problem (eg bug#6401), so better abort. */
2298 fprintf (stderr
, "Error: charsets directory not found:\n\
2300 Emacs will not function correctly without the character map files.\n%s\
2301 Please check your installation!\n",
2303 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2304 variable is set, maybe it has the wrong value?\n" : "");
2308 Vcharset_map_path
= list1 (tempdir
);
2313 init_charset_once (void)
2317 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2318 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2319 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2320 iso_charset_table
[i
][j
][k
] = -1;
2322 for (i
= 0; i
< 256; i
++)
2323 emacs_mule_charset
[i
] = -1;
2325 charset_jisx0201_roman
= -1;
2326 charset_jisx0208_1978
= -1;
2327 charset_jisx0208
= -1;
2328 charset_ksc5601
= -1;
2333 /* Allocate an initial charset table that is large enough to handle
2334 Emacs while it is bootstrapping. As of September 2011, the size
2335 needs to be at least 166; make it a bit bigger to allow for future
2338 Don't make the value so small that the table is reallocated during
2339 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2340 during an initial bootstrap wreak havoc after dumping; see the
2341 M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
2342 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2343 static struct charset charset_table_init
[180];
2346 syms_of_charset (void)
2348 DEFSYM (Qcharsetp
, "charsetp");
2350 DEFSYM (Qascii
, "ascii");
2351 DEFSYM (Qunicode
, "unicode");
2352 DEFSYM (Qemacs
, "emacs");
2353 DEFSYM (Qeight_bit
, "eight-bit");
2354 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2359 staticpro (&Vcharset_ordered_list
);
2360 Vcharset_ordered_list
= Qnil
;
2362 staticpro (&Viso_2022_charset_list
);
2363 Viso_2022_charset_list
= Qnil
;
2365 staticpro (&Vemacs_mule_charset_list
);
2366 Vemacs_mule_charset_list
= Qnil
;
2368 /* Don't staticpro them here. It's done in syms_of_fns. */
2369 QCtest
= intern_c_string (":test");
2370 Qeq
= intern_c_string ("eq");
2372 staticpro (&Vcharset_hash_table
);
2374 Lisp_Object args
[2];
2377 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2380 charset_table
= charset_table_init
;
2381 charset_table_size
= ARRAYELTS (charset_table_init
);
2382 charset_table_used
= 0;
2384 defsubr (&Scharsetp
);
2385 defsubr (&Smap_charset_chars
);
2386 defsubr (&Sdefine_charset_internal
);
2387 defsubr (&Sdefine_charset_alias
);
2388 defsubr (&Scharset_plist
);
2389 defsubr (&Sset_charset_plist
);
2390 defsubr (&Sunify_charset
);
2391 defsubr (&Sget_unused_iso_final_char
);
2392 defsubr (&Sdeclare_equiv_charset
);
2393 defsubr (&Sfind_charset_region
);
2394 defsubr (&Sfind_charset_string
);
2395 defsubr (&Sdecode_char
);
2396 defsubr (&Sencode_char
);
2397 defsubr (&Ssplit_char
);
2398 defsubr (&Smake_char
);
2399 defsubr (&Schar_charset
);
2400 defsubr (&Scharset_after
);
2401 defsubr (&Siso_charset
);
2402 defsubr (&Sclear_charset_maps
);
2403 defsubr (&Scharset_priority_list
);
2404 defsubr (&Sset_charset_priority
);
2405 defsubr (&Scharset_id_internal
);
2406 defsubr (&Ssort_charsets
);
2408 DEFVAR_LISP ("charset-map-path", Vcharset_map_path
,
2409 doc
: /* List of directories to search for charset map files. */);
2410 Vcharset_map_path
= Qnil
;
2412 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map
,
2413 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2414 inhibit_load_charset_map
= 0;
2416 DEFVAR_LISP ("charset-list", Vcharset_list
,
2417 doc
: /* List of all charsets ever defined. */);
2418 Vcharset_list
= Qnil
;
2420 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language
,
2421 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2422 If the current language environment is for multiple languages (e.g. "Latin-1"),
2423 the value may be a list of mnemonics. */);
2424 Vcurrent_iso639_language
= Qnil
;
2427 = define_charset_internal (Qascii
, 1, "\x00\x7F\0\0\0\0\0",
2428 0, 127, 'B', -1, 0, 1, 0, 0);
2430 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\0\0\0\0\0",
2431 0, 255, -1, -1, -1, 1, 0, 0);
2433 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2434 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2436 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
2437 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2439 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\0\0\0\0\0",
2440 128, 255, -1, 0, -1, 0, 1,
2441 MAX_5_BYTE_CHAR
+ 1);
2442 charset_unibyte
= charset_iso_8859_1
;