1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010, 2011 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010, 2011
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
13 This file is part of GNU Emacs.
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
33 #include <sys/types.h>
36 #include "character.h"
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
57 /* Hash table that contains attributes of each charset. Keys are
58 charset symbols, and values are vectors of charset attributes. */
59 Lisp_Object Vcharset_hash_table
;
61 /* Table of struct charset. */
62 struct charset
*charset_table
;
64 static int charset_table_size
;
65 static int charset_table_used
;
67 Lisp_Object Qcharsetp
;
69 /* Special charset symbols. */
71 Lisp_Object Qeight_bit
;
72 Lisp_Object Qiso_8859_1
;
76 /* The corresponding charsets. */
78 int charset_eight_bit
;
79 int charset_iso_8859_1
;
83 /* The other special charsets. */
84 int charset_jisx0201_roman
;
85 int charset_jisx0208_1978
;
89 /* Value of charset attribute `charset-iso-plane'. */
92 /* Charset of unibyte characters. */
95 /* List of charsets ordered by the priority. */
96 Lisp_Object Vcharset_ordered_list
;
98 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
100 Lisp_Object Vcharset_non_preferred_head
;
102 /* Incremented everytime we change Vcharset_ordered_list. This is
103 unsigned short so that it fits in Lisp_Int and never matches
105 unsigned short charset_ordered_list_tick
;
107 /* List of iso-2022 charsets. */
108 Lisp_Object Viso_2022_charset_list
;
110 /* List of emacs-mule charsets. */
111 Lisp_Object Vemacs_mule_charset_list
;
113 int emacs_mule_charset
[256];
115 /* Mapping table from ISO2022's charset (specified by DIMENSION,
116 CHARS, and FINAL-CHAR) to Emacs' charset. */
117 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
119 #define CODE_POINT_TO_INDEX(charset, code) \
120 ((charset)->code_linear_p \
121 ? (code) - (charset)->min_code \
122 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
123 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
124 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
125 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
126 ? (((((code) >> 24) - (charset)->code_space[12]) \
127 * (charset)->code_space[11]) \
128 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
129 * (charset)->code_space[7]) \
130 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
131 * (charset)->code_space[3]) \
132 + (((code) & 0xFF) - (charset)->code_space[0]) \
133 - ((charset)->char_index_offset)) \
137 /* Convert the character index IDX to code-point CODE for CHARSET.
138 It is assumed that IDX is in a valid range. */
140 #define INDEX_TO_CODE_POINT(charset, idx) \
141 ((charset)->code_linear_p \
142 ? (idx) + (charset)->min_code \
143 : (idx += (charset)->char_index_offset, \
144 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
145 | (((charset)->code_space[4] \
146 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
148 | (((charset)->code_space[8] \
149 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
151 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
154 /* Structure to hold mapping tables for a charset. Used by temacs
155 invoked for dumping. */
159 /* The current charset for which the following tables are setup. */
160 struct charset
*current
;
162 /* 1 iff the following table is used for encoder. */
165 /* When the following table is used for encoding, mininum and
166 maxinum character of the current charset. */
167 int min_char
, max_char
;
169 /* A Unicode character correspoinding to the code indice 0 (i.e. the
170 minimum code-point) of the current charset, or -1 if the code
171 indice 0 is not a Unicode character. This is checked when
172 table.encoder[CHAR] is zero. */
176 /* Table mapping code-indices (not code-points) of the current
177 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
178 doesn't belong to the current charset. */
179 int decoder
[0x10000];
180 /* Table mapping Unicode characters to code-indices of the current
181 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
182 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
183 (0x20000..0x2FFFF). Note that there is no charset map that
184 uses both SMP and SIP. */
185 unsigned short encoder
[0x20000];
187 } *temp_charset_work
;
189 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
192 temp_charset_work->zero_index_char = (C); \
193 else if ((C) < 0x20000) \
194 temp_charset_work->table.encoder[(C)] = (CODE); \
196 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
199 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
200 ((C) == temp_charset_work->zero_index_char ? 0 \
201 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
202 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
203 : temp_charset_work->table.encoder[(C) - 0x10000] \
204 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
206 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
207 (temp_charset_work->table.decoder[(CODE)] = (C))
209 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
210 (temp_charset_work->table.decoder[(CODE)])
213 /* Set to 1 to warn that a charset map is loaded and thus a buffer
214 text and a string data may be relocated. */
215 int charset_map_loaded
;
217 struct charset_map_entries
223 struct charset_map_entries
*next
;
226 /* Load the mapping information of CHARSET from ENTRIES for
227 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
228 encoding (CONTROL_FLAG == 2).
230 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
231 and CHARSET->fast_map.
233 If CONTROL_FLAG is 1, setup the following tables according to
234 CHARSET->method and inhibit_load_charset_map.
236 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
237 ----------------------+--------------------+---------------------------
238 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
239 ----------------------+--------------------+---------------------------
240 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
242 If CONTROL_FLAG is 2, setup the following tables.
244 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
245 ----------------------+--------------------+---------------------------
246 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
247 ----------------------+--------------------+--------------------------
248 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
252 load_charset_map (struct charset
*charset
, struct charset_map_entries
*entries
, int n_entries
, int control_flag
)
254 Lisp_Object vec
, table
;
255 unsigned max_code
= CHARSET_MAX_CODE (charset
);
256 int ascii_compatible_p
= charset
->ascii_compatible_p
;
257 int min_char
, max_char
, nonascii_min_char
;
259 unsigned char *fast_map
= charset
->fast_map
;
266 if (! inhibit_load_charset_map
)
268 if (control_flag
== 1)
270 if (charset
->method
== CHARSET_METHOD_MAP
)
272 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
274 vec
= CHARSET_DECODER (charset
)
275 = Fmake_vector (make_number (n
), make_number (-1));
279 char_table_set_range (Vchar_unify_table
,
280 charset
->min_char
, charset
->max_char
,
286 table
= Fmake_char_table (Qnil
, Qnil
);
287 if (charset
->method
== CHARSET_METHOD_MAP
)
288 CHARSET_ENCODER (charset
) = table
;
290 CHARSET_DEUNIFIER (charset
) = table
;
295 if (! temp_charset_work
)
296 temp_charset_work
= malloc (sizeof (*temp_charset_work
));
297 if (control_flag
== 1)
299 memset (temp_charset_work
->table
.decoder
, -1,
300 sizeof (int) * 0x10000);
304 memset (temp_charset_work
->table
.encoder
, 0,
305 sizeof (unsigned short) * 0x20000);
306 temp_charset_work
->zero_index_char
= -1;
308 temp_charset_work
->current
= charset
;
309 temp_charset_work
->for_encoder
= (control_flag
== 2);
312 charset_map_loaded
= 1;
315 min_char
= max_char
= entries
->entry
[0].c
;
316 nonascii_min_char
= MAX_CHAR
;
317 for (i
= 0; i
< n_entries
; i
++)
320 int from_index
, to_index
;
322 int idx
= i
% 0x10000;
324 if (i
> 0 && idx
== 0)
325 entries
= entries
->next
;
326 from
= entries
->entry
[idx
].from
;
327 to
= entries
->entry
[idx
].to
;
328 from_c
= entries
->entry
[idx
].c
;
329 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
332 to_index
= from_index
;
337 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
338 to_c
= from_c
+ (to_index
- from_index
);
340 if (from_index
< 0 || to_index
< 0)
345 else if (from_c
< min_char
)
348 if (control_flag
== 1)
350 if (charset
->method
== CHARSET_METHOD_MAP
)
351 for (; from_index
<= to_index
; from_index
++, from_c
++)
352 ASET (vec
, from_index
, make_number (from_c
));
354 for (; from_index
<= to_index
; from_index
++, from_c
++)
355 CHAR_TABLE_SET (Vchar_unify_table
,
356 CHARSET_CODE_OFFSET (charset
) + from_index
,
357 make_number (from_c
));
359 else if (control_flag
== 2)
361 if (charset
->method
== CHARSET_METHOD_MAP
362 && CHARSET_COMPACT_CODES_P (charset
))
363 for (; from_index
<= to_index
; from_index
++, from_c
++)
365 unsigned code
= INDEX_TO_CODE_POINT (charset
, from_index
);
367 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
368 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
371 for (; from_index
<= to_index
; from_index
++, from_c
++)
373 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
374 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
377 else if (control_flag
== 3)
378 for (; from_index
<= to_index
; from_index
++, from_c
++)
379 SET_TEMP_CHARSET_WORK_DECODER (from_c
, from_index
);
380 else if (control_flag
== 4)
381 for (; from_index
<= to_index
; from_index
++, from_c
++)
382 SET_TEMP_CHARSET_WORK_ENCODER (from_c
, from_index
);
383 else /* control_flag == 0 */
385 if (ascii_compatible_p
)
387 if (! ASCII_BYTE_P (from_c
))
389 if (from_c
< nonascii_min_char
)
390 nonascii_min_char
= from_c
;
392 else if (! ASCII_BYTE_P (to_c
))
394 nonascii_min_char
= 0x80;
398 for (; from_c
<= to_c
; from_c
++)
399 CHARSET_FAST_MAP_SET (from_c
, fast_map
);
403 if (control_flag
== 0)
405 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
406 ? nonascii_min_char
: min_char
);
407 CHARSET_MAX_CHAR (charset
) = max_char
;
409 else if (control_flag
== 4)
411 temp_charset_work
->min_char
= min_char
;
412 temp_charset_work
->max_char
= max_char
;
417 /* Read a hexadecimal number (preceded by "0x") from the file FP while
418 paying attention to comment character '#'. */
420 static INLINE
unsigned
421 read_hex (FILE *fp
, int *eof
)
426 while ((c
= getc (fp
)) != EOF
)
430 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
434 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
446 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
448 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
450 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
451 n
= (n
* 10) + c
- '0';
457 /* Return a mapping vector for CHARSET loaded from MAPFILE.
458 Each line of MAPFILE has this form
460 where 0xAAAA is a code-point and 0xCCCC is the corresponding
461 character code, or this form
463 where 0xAAAA and 0xBBBB are code-points specifying a range, and
464 0xCCCC is the first character code of the range.
466 The returned vector has this form:
467 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
468 where CODE1 is a code-point or a cons of code-points specifying a
471 Note that this function uses `openp' to open MAPFILE but ignores
472 `file-name-handler-alist' to avoid running any Lisp code. */
475 load_charset_map_from_file (struct charset
*charset
, Lisp_Object mapfile
, int control_flag
)
477 unsigned min_code
= CHARSET_MIN_CODE (charset
);
478 unsigned max_code
= CHARSET_MAX_CODE (charset
);
482 Lisp_Object suffixes
;
483 struct charset_map_entries
*head
, *entries
;
484 int n_entries
, count
;
487 suffixes
= Fcons (build_string (".map"),
488 Fcons (build_string (".TXT"), Qnil
));
490 count
= SPECPDL_INDEX ();
491 specbind (Qfile_name_handler_alist
, Qnil
);
492 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
493 unbind_to (count
, Qnil
);
495 || ! (fp
= fdopen (fd
, "r")))
496 error ("Failure in loading charset map: %S", SDATA (mapfile
));
498 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
499 large (larger than MAX_ALLOCA). */
500 SAFE_ALLOCA (head
, struct charset_map_entries
*,
501 sizeof (struct charset_map_entries
));
503 memset (entries
, 0, sizeof (struct charset_map_entries
));
513 from
= read_hex (fp
, &eof
);
516 if (getc (fp
) == '-')
517 to
= read_hex (fp
, &eof
);
520 c
= (int) read_hex (fp
, &eof
);
522 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
525 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
527 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
528 sizeof (struct charset_map_entries
));
529 entries
= entries
->next
;
530 memset (entries
, 0, sizeof (struct charset_map_entries
));
532 idx
= n_entries
% 0x10000;
533 entries
->entry
[idx
].from
= from
;
534 entries
->entry
[idx
].to
= to
;
535 entries
->entry
[idx
].c
= c
;
540 load_charset_map (charset
, head
, n_entries
, control_flag
);
545 load_charset_map_from_vector (struct charset
*charset
, Lisp_Object vec
, int control_flag
)
547 unsigned min_code
= CHARSET_MIN_CODE (charset
);
548 unsigned max_code
= CHARSET_MAX_CODE (charset
);
549 struct charset_map_entries
*head
, *entries
;
551 int len
= ASIZE (vec
);
557 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
561 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
562 large (larger than MAX_ALLOCA). */
563 SAFE_ALLOCA (head
, struct charset_map_entries
*,
564 sizeof (struct charset_map_entries
));
566 memset (entries
, 0, sizeof (struct charset_map_entries
));
569 for (i
= 0; i
< len
; i
+= 2)
571 Lisp_Object val
, val2
;
583 from
= XFASTINT (val
);
584 to
= XFASTINT (val2
);
589 from
= to
= XFASTINT (val
);
591 val
= AREF (vec
, i
+ 1);
595 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
598 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
600 SAFE_ALLOCA (entries
->next
, struct charset_map_entries
*,
601 sizeof (struct charset_map_entries
));
602 entries
= entries
->next
;
603 memset (entries
, 0, sizeof (struct charset_map_entries
));
605 idx
= n_entries
% 0x10000;
606 entries
->entry
[idx
].from
= from
;
607 entries
->entry
[idx
].to
= to
;
608 entries
->entry
[idx
].c
= c
;
612 load_charset_map (charset
, head
, n_entries
, control_flag
);
617 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
618 map it is (see the comment of load_charset_map for the detail). */
621 load_charset (struct charset
*charset
, int control_flag
)
625 if (inhibit_load_charset_map
627 && charset
== temp_charset_work
->current
628 && ((control_flag
== 2) == temp_charset_work
->for_encoder
))
631 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
632 map
= CHARSET_MAP (charset
);
633 else if (CHARSET_UNIFIED_P (charset
))
634 map
= CHARSET_UNIFY_MAP (charset
);
636 load_charset_map_from_file (charset
, map
, control_flag
);
638 load_charset_map_from_vector (charset
, map
, control_flag
);
642 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
643 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
646 return (CHARSETP (object
) ? Qt
: Qnil
);
650 void map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
),
651 Lisp_Object function
, Lisp_Object arg
,
652 unsigned from
, unsigned to
);
655 map_charset_for_dump (void (*c_function
) (Lisp_Object
, Lisp_Object
), Lisp_Object function
, Lisp_Object arg
, unsigned int from
, unsigned int to
)
657 int from_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, from
);
658 int to_idx
= CODE_POINT_TO_INDEX (temp_charset_work
->current
, to
);
663 range
= Fcons (Qnil
, Qnil
);
666 c
= temp_charset_work
->min_char
;
667 stop
= (temp_charset_work
->max_char
< 0x20000
668 ? temp_charset_work
->max_char
: 0xFFFF);
672 int index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
674 if (index
>= from_idx
&& index
<= to_idx
)
676 if (NILP (XCAR (range
)))
677 XSETCAR (range
, make_number (c
));
679 else if (! NILP (XCAR (range
)))
681 XSETCDR (range
, make_number (c
- 1));
683 (*c_function
) (arg
, range
);
685 call2 (function
, range
, arg
);
686 XSETCAR (range
, Qnil
);
690 if (c
== temp_charset_work
->max_char
)
692 if (! NILP (XCAR (range
)))
694 XSETCDR (range
, make_number (c
));
696 (*c_function
) (arg
, range
);
698 call2 (function
, range
, arg
);
703 stop
= temp_charset_work
->max_char
;
711 map_charset_chars (void (*c_function
)(Lisp_Object
, Lisp_Object
), Lisp_Object function
,
712 Lisp_Object arg
, struct charset
*charset
, unsigned from
, unsigned to
)
717 partial
= (from
> CHARSET_MIN_CODE (charset
)
718 || to
< CHARSET_MAX_CODE (charset
));
720 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
722 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
723 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
724 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
725 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
727 if (CHARSET_UNIFIED_P (charset
))
729 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
730 load_charset (charset
, 2);
731 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
732 map_char_table_for_charset (c_function
, function
,
733 CHARSET_DEUNIFIER (charset
), arg
,
734 partial
? charset
: NULL
, from
, to
);
736 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
739 range
= Fcons (make_number (from_c
), make_number (to_c
));
741 (*c_function
) (arg
, range
);
743 call2 (function
, range
, arg
);
745 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
747 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
748 load_charset (charset
, 2);
749 if (CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
750 map_char_table_for_charset (c_function
, function
,
751 CHARSET_ENCODER (charset
), arg
,
752 partial
? charset
: NULL
, from
, to
);
754 map_charset_for_dump (c_function
, function
, arg
, from
, to
);
756 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
758 Lisp_Object subset_info
;
761 subset_info
= CHARSET_SUBSET (charset
);
762 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
763 offset
= XINT (AREF (subset_info
, 3));
765 if (from
< XFASTINT (AREF (subset_info
, 1)))
766 from
= XFASTINT (AREF (subset_info
, 1));
768 if (to
> XFASTINT (AREF (subset_info
, 2)))
769 to
= XFASTINT (AREF (subset_info
, 2));
770 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
772 else /* i.e. CHARSET_METHOD_SUPERSET */
776 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
777 parents
= XCDR (parents
))
780 unsigned this_from
, this_to
;
782 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
783 offset
= XINT (XCDR (XCAR (parents
)));
784 this_from
= from
> offset
? from
- offset
: 0;
785 this_to
= to
> offset
? to
- offset
: 0;
786 if (this_from
< CHARSET_MIN_CODE (charset
))
787 this_from
= CHARSET_MIN_CODE (charset
);
788 if (this_to
> CHARSET_MAX_CODE (charset
))
789 this_to
= CHARSET_MAX_CODE (charset
);
790 map_charset_chars (c_function
, function
, arg
, charset
,
796 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
797 doc
: /* Call FUNCTION for all characters in CHARSET.
798 FUNCTION is called with an argument RANGE and the optional 3rd
801 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
802 characters contained in CHARSET.
804 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
805 range of code points (in CHARSET) of target characters. */)
806 (Lisp_Object function
, Lisp_Object charset
, Lisp_Object arg
, Lisp_Object from_code
, Lisp_Object to_code
)
811 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
812 if (NILP (from_code
))
813 from
= CHARSET_MIN_CODE (cs
);
816 CHECK_NATNUM (from_code
);
817 from
= XINT (from_code
);
818 if (from
< CHARSET_MIN_CODE (cs
))
819 from
= CHARSET_MIN_CODE (cs
);
822 to
= CHARSET_MAX_CODE (cs
);
825 CHECK_NATNUM (to_code
);
827 if (to
> CHARSET_MAX_CODE (cs
))
828 to
= CHARSET_MAX_CODE (cs
);
830 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
835 /* Define a charset according to the arguments. The Nth argument is
836 the Nth attribute of the charset (the last attribute `charset-id'
837 is not included). See the docstring of `define-charset' for the
840 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
841 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
842 doc
: /* For internal use only.
843 usage: (define-charset-internal ...) */)
844 (int nargs
, Lisp_Object
*args
)
846 /* Charset attr vector. */
850 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
852 struct charset charset
;
855 int new_definition_p
;
858 if (nargs
!= charset_arg_max
)
859 return Fsignal (Qwrong_number_of_arguments
,
860 Fcons (intern ("define-charset-internal"),
861 make_number (nargs
)));
863 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
865 CHECK_SYMBOL (args
[charset_arg_name
]);
866 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
868 val
= args
[charset_arg_code_space
];
869 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
871 int min_byte
, max_byte
;
873 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
874 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
875 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
876 error ("Invalid :code-space value");
877 charset
.code_space
[i
* 4] = min_byte
;
878 charset
.code_space
[i
* 4 + 1] = max_byte
;
879 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
880 nchars
*= charset
.code_space
[i
* 4 + 2];
881 charset
.code_space
[i
* 4 + 3] = nchars
;
886 val
= args
[charset_arg_dimension
];
888 charset
.dimension
= dimension
;
892 charset
.dimension
= XINT (val
);
893 if (charset
.dimension
< 1 || charset
.dimension
> 4)
894 args_out_of_range_3 (val
, make_number (1), make_number (4));
897 charset
.code_linear_p
898 = (charset
.dimension
== 1
899 || (charset
.code_space
[2] == 256
900 && (charset
.dimension
== 2
901 || (charset
.code_space
[6] == 256
902 && (charset
.dimension
== 3
903 || charset
.code_space
[10] == 256)))));
905 if (! charset
.code_linear_p
)
907 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
908 memset (charset
.code_space_mask
, 0, 256);
909 for (i
= 0; i
< 4; i
++)
910 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
912 charset
.code_space_mask
[j
] |= (1 << i
);
915 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
917 charset
.min_code
= (charset
.code_space
[0]
918 | (charset
.code_space
[4] << 8)
919 | (charset
.code_space
[8] << 16)
920 | (charset
.code_space
[12] << 24));
921 charset
.max_code
= (charset
.code_space
[1]
922 | (charset
.code_space
[5] << 8)
923 | (charset
.code_space
[9] << 16)
924 | (charset
.code_space
[13] << 24));
925 charset
.char_index_offset
= 0;
927 val
= args
[charset_arg_min_code
];
937 CHECK_NUMBER_CAR (val
);
938 CHECK_NUMBER_CDR (val
);
939 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
941 if (code
< charset
.min_code
942 || code
> charset
.max_code
)
943 args_out_of_range_3 (make_number (charset
.min_code
),
944 make_number (charset
.max_code
), val
);
945 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
946 charset
.min_code
= code
;
949 val
= args
[charset_arg_max_code
];
959 CHECK_NUMBER_CAR (val
);
960 CHECK_NUMBER_CDR (val
);
961 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
963 if (code
< charset
.min_code
964 || code
> charset
.max_code
)
965 args_out_of_range_3 (make_number (charset
.min_code
),
966 make_number (charset
.max_code
), val
);
967 charset
.max_code
= code
;
970 charset
.compact_codes_p
= charset
.max_code
< 0x10000;
972 val
= args
[charset_arg_invalid_code
];
975 if (charset
.min_code
> 0)
976 charset
.invalid_code
= 0;
979 XSETINT (val
, charset
.max_code
+ 1);
980 if (XINT (val
) == charset
.max_code
+ 1)
981 charset
.invalid_code
= charset
.max_code
+ 1;
983 error ("Attribute :invalid-code must be specified");
989 charset
.invalid_code
= XFASTINT (val
);
992 val
= args
[charset_arg_iso_final
];
994 charset
.iso_final
= -1;
998 if (XINT (val
) < '0' || XINT (val
) > 127)
999 error ("Invalid iso-final-char: %d", XINT (val
));
1000 charset
.iso_final
= XINT (val
);
1003 val
= args
[charset_arg_iso_revision
];
1005 charset
.iso_revision
= -1;
1009 if (XINT (val
) > 63)
1010 args_out_of_range (make_number (63), val
);
1011 charset
.iso_revision
= XINT (val
);
1014 val
= args
[charset_arg_emacs_mule_id
];
1016 charset
.emacs_mule_id
= -1;
1020 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
1021 error ("Invalid emacs-mule-id: %d", XINT (val
));
1022 charset
.emacs_mule_id
= XINT (val
);
1025 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
1027 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
1029 charset
.unified_p
= 0;
1031 memset (charset
.fast_map
, 0, sizeof (charset
.fast_map
));
1033 if (! NILP (args
[charset_arg_code_offset
]))
1035 val
= args
[charset_arg_code_offset
];
1038 charset
.method
= CHARSET_METHOD_OFFSET
;
1039 charset
.code_offset
= XINT (val
);
1041 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
1042 charset
.min_char
= i
+ charset
.code_offset
;
1043 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
1044 charset
.max_char
= i
+ charset
.code_offset
;
1045 if (charset
.max_char
> MAX_CHAR
)
1046 error ("Unsupported max char: %d", charset
.max_char
);
1048 i
= (charset
.min_char
>> 7) << 7;
1049 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
1050 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1051 i
= (i
>> 12) << 12;
1052 for (; i
<= charset
.max_char
; i
+= 0x1000)
1053 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
1054 if (charset
.code_offset
== 0 && charset
.max_char
>= 0x80)
1055 charset
.ascii_compatible_p
= 1;
1057 else if (! NILP (args
[charset_arg_map
]))
1059 val
= args
[charset_arg_map
];
1060 ASET (attrs
, charset_map
, val
);
1061 charset
.method
= CHARSET_METHOD_MAP
;
1063 else if (! NILP (args
[charset_arg_subset
]))
1066 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
1067 struct charset
*parent_charset
;
1069 val
= args
[charset_arg_subset
];
1070 parent
= Fcar (val
);
1071 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
1072 parent_min_code
= Fnth (make_number (1), val
);
1073 CHECK_NATNUM (parent_min_code
);
1074 parent_max_code
= Fnth (make_number (2), val
);
1075 CHECK_NATNUM (parent_max_code
);
1076 parent_code_offset
= Fnth (make_number (3), val
);
1077 CHECK_NUMBER (parent_code_offset
);
1078 val
= Fmake_vector (make_number (4), Qnil
);
1079 ASET (val
, 0, make_number (parent_charset
->id
));
1080 ASET (val
, 1, parent_min_code
);
1081 ASET (val
, 2, parent_max_code
);
1082 ASET (val
, 3, parent_code_offset
);
1083 ASET (attrs
, charset_subset
, val
);
1085 charset
.method
= CHARSET_METHOD_SUBSET
;
1086 /* Here, we just copy the parent's fast_map. It's not accurate,
1087 but at least it works for quickly detecting which character
1088 DOESN'T belong to this charset. */
1089 for (i
= 0; i
< 190; i
++)
1090 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
1092 /* We also copy these for parents. */
1093 charset
.min_char
= parent_charset
->min_char
;
1094 charset
.max_char
= parent_charset
->max_char
;
1096 else if (! NILP (args
[charset_arg_superset
]))
1098 val
= args
[charset_arg_superset
];
1099 charset
.method
= CHARSET_METHOD_SUPERSET
;
1100 val
= Fcopy_sequence (val
);
1101 ASET (attrs
, charset_superset
, val
);
1103 charset
.min_char
= MAX_CHAR
;
1104 charset
.max_char
= 0;
1105 for (; ! NILP (val
); val
= Fcdr (val
))
1107 Lisp_Object elt
, car_part
, cdr_part
;
1108 int this_id
, offset
;
1109 struct charset
*this_charset
;
1114 car_part
= XCAR (elt
);
1115 cdr_part
= XCDR (elt
);
1116 CHECK_CHARSET_GET_ID (car_part
, this_id
);
1117 CHECK_NUMBER (cdr_part
);
1118 offset
= XINT (cdr_part
);
1122 CHECK_CHARSET_GET_ID (elt
, this_id
);
1125 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
1127 this_charset
= CHARSET_FROM_ID (this_id
);
1128 if (charset
.min_char
> this_charset
->min_char
)
1129 charset
.min_char
= this_charset
->min_char
;
1130 if (charset
.max_char
< this_charset
->max_char
)
1131 charset
.max_char
= this_charset
->max_char
;
1132 for (i
= 0; i
< 190; i
++)
1133 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
1137 error ("None of :code-offset, :map, :parents are specified");
1139 val
= args
[charset_arg_unify_map
];
1140 if (! NILP (val
) && !STRINGP (val
))
1142 ASET (attrs
, charset_unify_map
, val
);
1144 CHECK_LIST (args
[charset_arg_plist
]);
1145 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1147 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1149 if (charset
.hash_index
>= 0)
1151 new_definition_p
= 0;
1152 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1153 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1157 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1159 if (charset_table_used
== charset_table_size
)
1161 struct charset
*new_table
1162 = (struct charset
*) xmalloc (sizeof (struct charset
)
1163 * (charset_table_size
+ 16));
1164 memcpy (new_table
, charset_table
,
1165 sizeof (struct charset
) * charset_table_size
);
1166 charset_table_size
+= 16;
1167 charset_table
= new_table
;
1169 id
= charset_table_used
++;
1170 new_definition_p
= 1;
1173 ASET (attrs
, charset_id
, make_number (id
));
1175 charset_table
[id
] = charset
;
1177 if (charset
.method
== CHARSET_METHOD_MAP
)
1179 load_charset (&charset
, 0);
1180 charset_table
[id
] = charset
;
1183 if (charset
.iso_final
>= 0)
1185 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1186 charset
.iso_final
) = id
;
1187 if (new_definition_p
)
1188 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1189 Fcons (make_number (id
), Qnil
));
1190 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1191 charset_jisx0201_roman
= id
;
1192 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1193 charset_jisx0208_1978
= id
;
1194 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1195 charset_jisx0208
= id
;
1196 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id
)
1197 charset_ksc5601
= id
;
1200 if (charset
.emacs_mule_id
>= 0)
1202 emacs_mule_charset
[charset
.emacs_mule_id
] = id
;
1203 if (charset
.emacs_mule_id
< 0xA0)
1204 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1206 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 2;
1207 if (new_definition_p
)
1208 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1209 Fcons (make_number (id
), Qnil
));
1212 if (new_definition_p
)
1214 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1215 if (charset
.supplementary_p
)
1216 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1217 Fcons (make_number (id
), Qnil
));
1222 for (tail
= Vcharset_ordered_list
; CONSP (tail
); tail
= XCDR (tail
))
1224 struct charset
*cs
= CHARSET_FROM_ID (XINT (XCAR (tail
)));
1226 if (cs
->supplementary_p
)
1229 if (EQ (tail
, Vcharset_ordered_list
))
1230 Vcharset_ordered_list
= Fcons (make_number (id
),
1231 Vcharset_ordered_list
);
1232 else if (NILP (tail
))
1233 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1234 Fcons (make_number (id
), Qnil
));
1237 val
= Fcons (XCAR (tail
), XCDR (tail
));
1238 XSETCDR (tail
, val
);
1239 XSETCAR (tail
, make_number (id
));
1242 charset_ordered_list_tick
++;
1249 /* Same as Fdefine_charset_internal but arguments are more convenient
1250 to call from C (typically in syms_of_charset). This can define a
1251 charset of `offset' method only. Return the ID of the new
1255 define_charset_internal (Lisp_Object name
,
1257 const unsigned char *code_space
,
1258 unsigned min_code
, unsigned max_code
,
1259 int iso_final
, int iso_revision
, int emacs_mule_id
,
1260 int ascii_compatible
, int supplementary
,
1263 Lisp_Object args
[charset_arg_max
];
1264 Lisp_Object plist
[14];
1268 args
[charset_arg_name
] = name
;
1269 args
[charset_arg_dimension
] = make_number (dimension
);
1270 val
= Fmake_vector (make_number (8), make_number (0));
1271 for (i
= 0; i
< 8; i
++)
1272 ASET (val
, i
, make_number (code_space
[i
]));
1273 args
[charset_arg_code_space
] = val
;
1274 args
[charset_arg_min_code
] = make_number (min_code
);
1275 args
[charset_arg_max_code
] = make_number (max_code
);
1276 args
[charset_arg_iso_final
]
1277 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1278 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1279 args
[charset_arg_emacs_mule_id
]
1280 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1281 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1282 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1283 args
[charset_arg_invalid_code
] = Qnil
;
1284 args
[charset_arg_code_offset
] = make_number (code_offset
);
1285 args
[charset_arg_map
] = Qnil
;
1286 args
[charset_arg_subset
] = Qnil
;
1287 args
[charset_arg_superset
] = Qnil
;
1288 args
[charset_arg_unify_map
] = Qnil
;
1290 plist
[0] = intern_c_string (":name");
1291 plist
[1] = args
[charset_arg_name
];
1292 plist
[2] = intern_c_string (":dimension");
1293 plist
[3] = args
[charset_arg_dimension
];
1294 plist
[4] = intern_c_string (":code-space");
1295 plist
[5] = args
[charset_arg_code_space
];
1296 plist
[6] = intern_c_string (":iso-final-char");
1297 plist
[7] = args
[charset_arg_iso_final
];
1298 plist
[8] = intern_c_string (":emacs-mule-id");
1299 plist
[9] = args
[charset_arg_emacs_mule_id
];
1300 plist
[10] = intern_c_string (":ascii-compatible-p");
1301 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1302 plist
[12] = intern_c_string (":code-offset");
1303 plist
[13] = args
[charset_arg_code_offset
];
1305 args
[charset_arg_plist
] = Flist (14, plist
);
1306 Fdefine_charset_internal (charset_arg_max
, args
);
1308 return XINT (CHARSET_SYMBOL_ID (name
));
1312 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1313 Sdefine_charset_alias
, 2, 2, 0,
1314 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1315 (Lisp_Object alias
, Lisp_Object charset
)
1319 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1320 Fputhash (alias
, attr
, Vcharset_hash_table
);
1321 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1326 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1327 doc
: /* Return the property list of CHARSET. */)
1328 (Lisp_Object charset
)
1332 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1333 return CHARSET_ATTR_PLIST (attrs
);
1337 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1338 doc
: /* Set CHARSET's property list to PLIST. */)
1339 (Lisp_Object charset
, Lisp_Object plist
)
1343 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1344 CHARSET_ATTR_PLIST (attrs
) = plist
;
1349 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1350 doc
: /* Unify characters of CHARSET with Unicode.
1351 This means reading the relevant file and installing the table defined
1352 by CHARSET's `:unify-map' property.
1354 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1355 the same meaning as the `:unify-map' attribute in the function
1356 `define-charset' (which see).
1358 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1359 (Lisp_Object charset
, Lisp_Object unify_map
, Lisp_Object deunify
)
1364 CHECK_CHARSET_GET_ID (charset
, id
);
1365 cs
= CHARSET_FROM_ID (id
);
1367 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1368 : ! CHARSET_UNIFIED_P (cs
))
1371 CHARSET_UNIFIED_P (cs
) = 0;
1374 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
1375 || CHARSET_CODE_OFFSET (cs
) < 0x110000)
1376 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1377 if (NILP (unify_map
))
1378 unify_map
= CHARSET_UNIFY_MAP (cs
);
1381 if (! STRINGP (unify_map
) && ! VECTORP (unify_map
))
1382 signal_error ("Bad unify-map", unify_map
);
1383 CHARSET_UNIFY_MAP (cs
) = unify_map
;
1385 if (NILP (Vchar_unify_table
))
1386 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
1387 char_table_set_range (Vchar_unify_table
,
1388 cs
->min_char
, cs
->max_char
, charset
);
1389 CHARSET_UNIFIED_P (cs
) = 1;
1391 else if (CHAR_TABLE_P (Vchar_unify_table
))
1393 int min_code
= CHARSET_MIN_CODE (cs
);
1394 int max_code
= CHARSET_MAX_CODE (cs
);
1395 int min_char
= DECODE_CHAR (cs
, min_code
);
1396 int max_char
= DECODE_CHAR (cs
, max_code
);
1398 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1404 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1405 Sget_unused_iso_final_char
, 2, 2, 0,
1407 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1408 DIMENSION is the number of bytes to represent a character: 1 or 2.
1409 CHARS is the number of characters in a dimension: 94 or 96.
1411 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1412 If there's no unused final char for the specified kind of charset,
1414 (Lisp_Object dimension
, Lisp_Object chars
)
1418 CHECK_NUMBER (dimension
);
1419 CHECK_NUMBER (chars
);
1420 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1421 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1422 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1423 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1424 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1425 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1427 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1431 check_iso_charset_parameter (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
1433 CHECK_NATNUM (dimension
);
1434 CHECK_NATNUM (chars
);
1435 CHECK_NATNUM (final_char
);
1437 if (XINT (dimension
) > 3)
1438 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1439 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1440 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1441 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1442 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1446 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1448 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1450 On decoding by an ISO-2022 base coding system, when a charset
1451 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1452 if CHARSET is designated instead. */)
1453 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
, Lisp_Object charset
)
1458 CHECK_CHARSET_GET_ID (charset
, id
);
1459 check_iso_charset_parameter (dimension
, chars
, final_char
);
1460 chars_flag
= XINT (chars
) == 96;
1461 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1466 /* Return information about charsets in the text at PTR of NBYTES
1467 bytes, which are NCHARS characters. The value is:
1469 0: Each character is represented by one byte. This is always
1470 true for a unibyte string. For a multibyte string, true if
1471 it contains only ASCII characters.
1473 1: No charsets other than ascii, control-1, and latin-1 are
1480 string_xstring_p (Lisp_Object string
)
1482 const unsigned char *p
= SDATA (string
);
1483 const unsigned char *endp
= p
+ SBYTES (string
);
1485 if (SCHARS (string
) == SBYTES (string
))
1490 int c
= STRING_CHAR_ADVANCE (p
);
1499 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1501 CHARSETS is a vector. If Nth element is non-nil, it means the
1502 charset whose id is N is already found.
1504 It may lookup a translation table TABLE if supplied. */
1507 find_charsets_in_text (const unsigned char *ptr
, EMACS_INT nchars
, EMACS_INT nbytes
, Lisp_Object charsets
, Lisp_Object table
, int multibyte
)
1509 const unsigned char *pend
= ptr
+ nbytes
;
1511 if (nchars
== nbytes
)
1514 ASET (charsets
, charset_ascii
, Qt
);
1521 c
= translate_char (table
, c
);
1522 if (ASCII_BYTE_P (c
))
1523 ASET (charsets
, charset_ascii
, Qt
);
1525 ASET (charsets
, charset_eight_bit
, Qt
);
1532 int c
= STRING_CHAR_ADVANCE (ptr
);
1533 struct charset
*charset
;
1536 c
= translate_char (table
, c
);
1537 charset
= CHAR_CHARSET (c
);
1538 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1543 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1545 doc
: /* Return a list of charsets in the region between BEG and END.
1546 BEG and END are buffer positions.
1547 Optional arg TABLE if non-nil is a translation table to look up.
1549 If the current buffer is unibyte, the returned list may contain
1550 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1551 (Lisp_Object beg
, Lisp_Object end
, Lisp_Object table
)
1553 Lisp_Object charsets
;
1554 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1557 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1559 validate_region (&beg
, &end
);
1560 from
= XFASTINT (beg
);
1561 stop
= to
= XFASTINT (end
);
1563 if (from
< GPT
&& GPT
< to
)
1566 stop_byte
= GPT_BYTE
;
1569 stop_byte
= CHAR_TO_BYTE (stop
);
1571 from_byte
= CHAR_TO_BYTE (from
);
1573 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1576 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1577 stop_byte
- from_byte
, charsets
, table
,
1581 from
= stop
, from_byte
= stop_byte
;
1582 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1589 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1590 if (!NILP (AREF (charsets
, i
)))
1591 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1595 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1597 doc
: /* Return a list of charsets in STR.
1598 Optional arg TABLE if non-nil is a translation table to look up.
1600 If STR is unibyte, the returned list may contain
1601 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1602 (Lisp_Object str
, Lisp_Object table
)
1604 Lisp_Object charsets
;
1610 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1611 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1613 STRING_MULTIBYTE (str
));
1615 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1616 if (!NILP (AREF (charsets
, i
)))
1617 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1623 /* Return a unified character code for C (>= 0x110000). VAL is a
1624 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1627 maybe_unify_char (int c
, Lisp_Object val
)
1629 struct charset
*charset
;
1636 CHECK_CHARSET_GET_CHARSET (val
, charset
);
1637 load_charset (charset
, 1);
1638 if (! inhibit_load_charset_map
)
1640 val
= CHAR_TABLE_REF (Vchar_unify_table
, c
);
1646 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1647 int unified
= GET_TEMP_CHARSET_WORK_DECODER (code_index
);
1656 /* Return a character correponding to the code-point CODE of
1660 decode_char (struct charset
*charset
, unsigned int code
)
1663 enum charset_method method
= CHARSET_METHOD (charset
);
1665 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1668 if (method
== CHARSET_METHOD_SUBSET
)
1670 Lisp_Object subset_info
;
1672 subset_info
= CHARSET_SUBSET (charset
);
1673 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1674 code
-= XINT (AREF (subset_info
, 3));
1675 if (code
< XFASTINT (AREF (subset_info
, 1))
1676 || code
> XFASTINT (AREF (subset_info
, 2)))
1679 c
= DECODE_CHAR (charset
, code
);
1681 else if (method
== CHARSET_METHOD_SUPERSET
)
1683 Lisp_Object parents
;
1685 parents
= CHARSET_SUPERSET (charset
);
1687 for (; CONSP (parents
); parents
= XCDR (parents
))
1689 int id
= XINT (XCAR (XCAR (parents
)));
1690 int code_offset
= XINT (XCDR (XCAR (parents
)));
1691 unsigned this_code
= code
- code_offset
;
1693 charset
= CHARSET_FROM_ID (id
);
1694 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1700 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1704 if (method
== CHARSET_METHOD_MAP
)
1706 Lisp_Object decoder
;
1708 decoder
= CHARSET_DECODER (charset
);
1709 if (! VECTORP (decoder
))
1711 load_charset (charset
, 1);
1712 decoder
= CHARSET_DECODER (charset
);
1714 if (VECTORP (decoder
))
1715 c
= XINT (AREF (decoder
, char_index
));
1717 c
= GET_TEMP_CHARSET_WORK_DECODER (char_index
);
1719 else /* method == CHARSET_METHOD_OFFSET */
1721 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1722 if (CHARSET_UNIFIED_P (charset
)
1723 && c
> MAX_UNICODE_CHAR
)
1724 MAYBE_UNIFY_CHAR (c
);
1731 /* Variable used temporarily by the macro ENCODE_CHAR. */
1732 Lisp_Object charset_work
;
1734 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1735 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1736 use CHARSET's strict_max_char instead of max_char. */
1739 encode_char (struct charset
*charset
, int c
)
1742 enum charset_method method
= CHARSET_METHOD (charset
);
1744 if (CHARSET_UNIFIED_P (charset
))
1746 Lisp_Object deunifier
;
1747 int code_index
= -1;
1749 deunifier
= CHARSET_DEUNIFIER (charset
);
1750 if (! CHAR_TABLE_P (deunifier
))
1752 load_charset (charset
, 2);
1753 deunifier
= CHARSET_DEUNIFIER (charset
);
1755 if (CHAR_TABLE_P (deunifier
))
1757 Lisp_Object deunified
= CHAR_TABLE_REF (deunifier
, c
);
1759 if (INTEGERP (deunified
))
1760 code_index
= XINT (deunified
);
1764 code_index
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1766 if (code_index
>= 0)
1767 c
= CHARSET_CODE_OFFSET (charset
) + code_index
;
1770 if (method
== CHARSET_METHOD_SUBSET
)
1772 Lisp_Object subset_info
;
1773 struct charset
*this_charset
;
1775 subset_info
= CHARSET_SUBSET (charset
);
1776 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1777 code
= ENCODE_CHAR (this_charset
, c
);
1778 if (code
== CHARSET_INVALID_CODE (this_charset
)
1779 || code
< XFASTINT (AREF (subset_info
, 1))
1780 || code
> XFASTINT (AREF (subset_info
, 2)))
1781 return CHARSET_INVALID_CODE (charset
);
1782 code
+= XINT (AREF (subset_info
, 3));
1786 if (method
== CHARSET_METHOD_SUPERSET
)
1788 Lisp_Object parents
;
1790 parents
= CHARSET_SUPERSET (charset
);
1791 for (; CONSP (parents
); parents
= XCDR (parents
))
1793 int id
= XINT (XCAR (XCAR (parents
)));
1794 int code_offset
= XINT (XCDR (XCAR (parents
)));
1795 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1797 code
= ENCODE_CHAR (this_charset
, c
);
1798 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1799 return code
+ code_offset
;
1801 return CHARSET_INVALID_CODE (charset
);
1804 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1805 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1806 return CHARSET_INVALID_CODE (charset
);
1808 if (method
== CHARSET_METHOD_MAP
)
1810 Lisp_Object encoder
;
1813 encoder
= CHARSET_ENCODER (charset
);
1814 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1816 load_charset (charset
, 2);
1817 encoder
= CHARSET_ENCODER (charset
);
1819 if (CHAR_TABLE_P (encoder
))
1821 val
= CHAR_TABLE_REF (encoder
, c
);
1823 return CHARSET_INVALID_CODE (charset
);
1825 if (! CHARSET_COMPACT_CODES_P (charset
))
1826 code
= INDEX_TO_CODE_POINT (charset
, code
);
1830 code
= GET_TEMP_CHARSET_WORK_ENCODER (c
);
1831 code
= INDEX_TO_CODE_POINT (charset
, code
);
1834 else /* method == CHARSET_METHOD_OFFSET */
1836 int code_index
= c
- CHARSET_CODE_OFFSET (charset
);
1838 code
= INDEX_TO_CODE_POINT (charset
, code_index
);
1845 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1846 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1847 Return nil if CODE-POINT is not valid in CHARSET.
1849 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1851 Optional argument RESTRICTION specifies a way to map the pair of CCS
1852 and CODE-POINT to a character. Currently not supported and just ignored. */)
1853 (Lisp_Object charset
, Lisp_Object code_point
, Lisp_Object restriction
)
1857 struct charset
*charsetp
;
1859 CHECK_CHARSET_GET_ID (charset
, id
);
1860 if (CONSP (code_point
))
1862 CHECK_NATNUM_CAR (code_point
);
1863 CHECK_NATNUM_CDR (code_point
);
1864 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1868 CHECK_NATNUM (code_point
);
1869 code
= XINT (code_point
);
1871 charsetp
= CHARSET_FROM_ID (id
);
1872 c
= DECODE_CHAR (charsetp
, code
);
1873 return (c
>= 0 ? make_number (c
) : Qnil
);
1877 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1878 doc
: /* Encode the character CH into a code-point of CHARSET.
1879 Return nil if CHARSET doesn't include CH.
1881 Optional argument RESTRICTION specifies a way to map CH to a
1882 code-point in CCS. Currently not supported and just ignored. */)
1883 (Lisp_Object ch
, Lisp_Object charset
, Lisp_Object restriction
)
1887 struct charset
*charsetp
;
1889 CHECK_CHARSET_GET_ID (charset
, id
);
1891 charsetp
= CHARSET_FROM_ID (id
);
1892 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1893 if (code
== CHARSET_INVALID_CODE (charsetp
))
1895 if (code
> 0x7FFFFFF)
1896 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1897 return make_number (code
);
1901 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1903 /* Return a character of CHARSET whose position codes are CODEn.
1905 CODE1 through CODE4 are optional, but if you don't supply sufficient
1906 position codes, it is assumed that the minimum code in each dimension
1908 (Lisp_Object charset
, Lisp_Object code1
, Lisp_Object code2
, Lisp_Object code3
, Lisp_Object code4
)
1911 struct charset
*charsetp
;
1915 CHECK_CHARSET_GET_ID (charset
, id
);
1916 charsetp
= CHARSET_FROM_ID (id
);
1918 dimension
= CHARSET_DIMENSION (charsetp
);
1920 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1921 ? 0 : CHARSET_MIN_CODE (charsetp
));
1924 CHECK_NATNUM (code1
);
1925 if (XFASTINT (code1
) >= 0x100)
1926 args_out_of_range (make_number (0xFF), code1
);
1927 code
= XFASTINT (code1
);
1933 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1936 CHECK_NATNUM (code2
);
1937 if (XFASTINT (code2
) >= 0x100)
1938 args_out_of_range (make_number (0xFF), code2
);
1939 code
|= XFASTINT (code2
);
1946 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1949 CHECK_NATNUM (code3
);
1950 if (XFASTINT (code3
) >= 0x100)
1951 args_out_of_range (make_number (0xFF), code3
);
1952 code
|= XFASTINT (code3
);
1959 code
|= charsetp
->code_space
[0];
1962 CHECK_NATNUM (code4
);
1963 if (XFASTINT (code4
) >= 0x100)
1964 args_out_of_range (make_number (0xFF), code4
);
1965 code
|= XFASTINT (code4
);
1972 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1974 c
= DECODE_CHAR (charsetp
, code
);
1976 error ("Invalid code(s)");
1977 return make_number (c
);
1981 /* Return the first charset in CHARSET_LIST that contains C.
1982 CHARSET_LIST is a list of charset IDs. If it is nil, use
1983 Vcharset_ordered_list. */
1986 char_charset (int c
, Lisp_Object charset_list
, unsigned int *code_return
)
1990 if (NILP (charset_list
))
1991 charset_list
= Vcharset_ordered_list
;
1995 while (CONSP (charset_list
))
1997 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
1998 unsigned code
= ENCODE_CHAR (charset
, c
);
2000 if (code
!= CHARSET_INVALID_CODE (charset
))
2003 *code_return
= code
;
2006 charset_list
= XCDR (charset_list
);
2008 && c
<= MAX_UNICODE_CHAR
2009 && EQ (charset_list
, Vcharset_non_preferred_head
))
2010 return CHARSET_FROM_ID (charset_unicode
);
2012 return (maybe_null
? NULL
2013 : c
<= MAX_5_BYTE_CHAR
? CHARSET_FROM_ID (charset_emacs
)
2014 : CHARSET_FROM_ID (charset_eight_bit
));
2018 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
2020 /*Return list of charset and one to four position-codes of CH.
2021 The charset is decided by the current priority order of charsets.
2022 A position-code is a byte value of each dimension of the code-point of
2023 CH in the charset. */)
2026 struct charset
*charset
;
2031 CHECK_CHARACTER (ch
);
2033 charset
= CHAR_CHARSET (c
);
2036 code
= ENCODE_CHAR (charset
, c
);
2037 if (code
== CHARSET_INVALID_CODE (charset
))
2039 dimension
= CHARSET_DIMENSION (charset
);
2040 for (val
= Qnil
; dimension
> 0; dimension
--)
2042 val
= Fcons (make_number (code
& 0xFF), val
);
2045 return Fcons (CHARSET_NAME (charset
), val
);
2049 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 2, 0,
2050 doc
: /* Return the charset of highest priority that contains CH.
2051 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2052 from which to find the charset. It may also be a coding system. In
2053 that case, find the charset from what supported by that coding system. */)
2054 (Lisp_Object ch
, Lisp_Object restriction
)
2056 struct charset
*charset
;
2058 CHECK_CHARACTER (ch
);
2059 if (NILP (restriction
))
2060 charset
= CHAR_CHARSET (XINT (ch
));
2063 if (CONSP (restriction
))
2065 int c
= XFASTINT (ch
);
2067 for (; CONSP (restriction
); restriction
= XCDR (restriction
))
2069 struct charset
*charset
;
2071 CHECK_CHARSET_GET_CHARSET (XCAR (restriction
), charset
);
2072 if (ENCODE_CHAR (charset
, c
) != CHARSET_INVALID_CODE (charset
))
2073 return XCAR (restriction
);
2077 restriction
= coding_system_charset_list (restriction
);
2078 charset
= char_charset (XINT (ch
), restriction
, NULL
);
2082 return (CHARSET_NAME (charset
));
2086 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
2088 Return charset of a character in the current buffer at position POS.
2089 If POS is nil, it defauls to the current point.
2090 If POS is out of range, the value is nil. */)
2094 struct charset
*charset
;
2096 ch
= Fchar_after (pos
);
2097 if (! INTEGERP (ch
))
2099 charset
= CHAR_CHARSET (XINT (ch
));
2100 return (CHARSET_NAME (charset
));
2104 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
2106 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2108 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2109 by their DIMENSION, CHARS, and FINAL-CHAR,
2110 whereas Emacs distinguishes them by charset symbol.
2111 See the documentation of the function `charset-info' for the meanings of
2112 DIMENSION, CHARS, and FINAL-CHAR. */)
2113 (Lisp_Object dimension
, Lisp_Object chars
, Lisp_Object final_char
)
2118 check_iso_charset_parameter (dimension
, chars
, final_char
);
2119 chars_flag
= XFASTINT (chars
) == 96;
2120 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
2121 XFASTINT (final_char
));
2122 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
2126 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
2130 Clear temporary charset mapping tables.
2131 It should be called only from temacs invoked for dumping. */)
2134 if (temp_charset_work
)
2136 free (temp_charset_work
);
2137 temp_charset_work
= NULL
;
2140 if (CHAR_TABLE_P (Vchar_unify_table
))
2141 Foptimize_char_table (Vchar_unify_table
, Qnil
);
2146 DEFUN ("charset-priority-list", Fcharset_priority_list
,
2147 Scharset_priority_list
, 0, 1, 0,
2148 doc
: /* Return the list of charsets ordered by priority.
2149 HIGHESTP non-nil means just return the highest priority one. */)
2150 (Lisp_Object highestp
)
2152 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
2154 if (!NILP (highestp
))
2155 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
2157 while (!NILP (list
))
2159 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
2162 return Fnreverse (val
);
2165 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
2167 doc
: /* Assign higher priority to the charsets given as arguments.
2168 usage: (set-charset-priority &rest charsets) */)
2169 (int nargs
, Lisp_Object
*args
)
2171 Lisp_Object new_head
, old_list
, arglist
[2];
2172 Lisp_Object list_2022
, list_emacs_mule
;
2175 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2177 for (i
= 0; i
< nargs
; i
++)
2179 CHECK_CHARSET_GET_ID (args
[i
], id
);
2180 if (! NILP (Fmemq (make_number (id
), old_list
)))
2182 old_list
= Fdelq (make_number (id
), old_list
);
2183 new_head
= Fcons (make_number (id
), new_head
);
2186 arglist
[0] = Fnreverse (new_head
);
2187 arglist
[1] = Vcharset_non_preferred_head
= old_list
;
2188 Vcharset_ordered_list
= Fnconc (2, arglist
);
2189 charset_ordered_list_tick
++;
2191 charset_unibyte
= -1;
2192 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2193 CONSP (old_list
); old_list
= XCDR (old_list
))
2195 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2196 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2197 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2198 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2199 if (charset_unibyte
< 0)
2201 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (old_list
)));
2203 if (CHARSET_DIMENSION (charset
) == 1
2204 && CHARSET_ASCII_COMPATIBLE_P (charset
)
2205 && CHARSET_MAX_CHAR (charset
) >= 0x80)
2206 charset_unibyte
= CHARSET_ID (charset
);
2209 Viso_2022_charset_list
= Fnreverse (list_2022
);
2210 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2211 if (charset_unibyte
< 0)
2212 charset_unibyte
= charset_iso_8859_1
;
2217 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2219 doc
: /* Internal use only.
2220 Return charset identification number of CHARSET. */)
2221 (Lisp_Object charset
)
2225 CHECK_CHARSET_GET_ID (charset
, id
);
2226 return make_number (id
);
2229 struct charset_sort_data
2231 Lisp_Object charset
;
2237 charset_compare (const void *d1
, const void *d2
)
2239 const struct charset_sort_data
*data1
= d1
, *data2
= d2
;
2240 return (data1
->priority
- data2
->priority
);
2243 DEFUN ("sort-charsets", Fsort_charsets
, Ssort_charsets
, 1, 1, 0,
2244 doc
: /* Sort charset list CHARSETS by a priority of each charset.
2245 Return the sorted list. CHARSETS is modified by side effects.
2246 See also `charset-priority-list' and `set-charset-priority'. */)
2247 (Lisp_Object charsets
)
2249 Lisp_Object len
= Flength (charsets
);
2250 int n
= XFASTINT (len
), i
, j
, done
;
2251 Lisp_Object tail
, elt
, attrs
;
2252 struct charset_sort_data
*sort_data
;
2253 int id
, min_id
, max_id
;
2258 SAFE_ALLOCA (sort_data
, struct charset_sort_data
*, sizeof (*sort_data
) * n
);
2259 for (tail
= charsets
, i
= 0; CONSP (tail
); tail
= XCDR (tail
), i
++)
2262 CHECK_CHARSET_GET_ATTR (elt
, attrs
);
2263 sort_data
[i
].charset
= elt
;
2264 sort_data
[i
].id
= id
= XINT (CHARSET_ATTR_ID (attrs
));
2266 min_id
= max_id
= id
;
2267 else if (id
< min_id
)
2269 else if (id
> max_id
)
2272 for (done
= 0, tail
= Vcharset_ordered_list
, i
= 0;
2273 done
< n
&& CONSP (tail
); tail
= XCDR (tail
), i
++)
2276 id
= XFASTINT (elt
);
2277 if (id
>= min_id
&& id
<= max_id
)
2278 for (j
= 0; j
< n
; j
++)
2279 if (sort_data
[j
].id
== id
)
2281 sort_data
[j
].priority
= i
;
2285 qsort (sort_data
, n
, sizeof *sort_data
, charset_compare
);
2286 for (i
= 0, tail
= charsets
; CONSP (tail
); tail
= XCDR (tail
), i
++)
2287 XSETCAR (tail
, sort_data
[i
].charset
);
2296 Lisp_Object tempdir
;
2297 tempdir
= Fexpand_file_name (build_string ("charsets"), Vdata_directory
);
2298 if (access (SSDATA (tempdir
), 0) < 0)
2300 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2301 Emacs will not function correctly without the character map files.\n\
2302 Please check your installation!\n",
2304 /* TODO should this be a fatal error? (Bug#909) */
2307 Vcharset_map_path
= Fcons (tempdir
, Qnil
);
2312 init_charset_once (void)
2316 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2317 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2318 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2319 iso_charset_table
[i
][j
][k
] = -1;
2321 for (i
= 0; i
< 256; i
++)
2322 emacs_mule_charset
[i
] = -1;
2324 charset_jisx0201_roman
= -1;
2325 charset_jisx0208_1978
= -1;
2326 charset_jisx0208
= -1;
2327 charset_ksc5601
= -1;
2333 syms_of_charset (void)
2335 DEFSYM (Qcharsetp
, "charsetp");
2337 DEFSYM (Qascii
, "ascii");
2338 DEFSYM (Qunicode
, "unicode");
2339 DEFSYM (Qemacs
, "emacs");
2340 DEFSYM (Qeight_bit
, "eight-bit");
2341 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2346 staticpro (&Vcharset_ordered_list
);
2347 Vcharset_ordered_list
= Qnil
;
2349 staticpro (&Viso_2022_charset_list
);
2350 Viso_2022_charset_list
= Qnil
;
2352 staticpro (&Vemacs_mule_charset_list
);
2353 Vemacs_mule_charset_list
= Qnil
;
2355 /* Don't staticpro them here. It's done in syms_of_fns. */
2356 QCtest
= intern_c_string (":test");
2357 Qeq
= intern_c_string ("eq");
2359 staticpro (&Vcharset_hash_table
);
2361 Lisp_Object args
[2];
2364 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2367 charset_table_size
= 128;
2368 charset_table
= ((struct charset
*)
2369 xmalloc (sizeof (struct charset
) * charset_table_size
));
2370 charset_table_used
= 0;
2372 defsubr (&Scharsetp
);
2373 defsubr (&Smap_charset_chars
);
2374 defsubr (&Sdefine_charset_internal
);
2375 defsubr (&Sdefine_charset_alias
);
2376 defsubr (&Scharset_plist
);
2377 defsubr (&Sset_charset_plist
);
2378 defsubr (&Sunify_charset
);
2379 defsubr (&Sget_unused_iso_final_char
);
2380 defsubr (&Sdeclare_equiv_charset
);
2381 defsubr (&Sfind_charset_region
);
2382 defsubr (&Sfind_charset_string
);
2383 defsubr (&Sdecode_char
);
2384 defsubr (&Sencode_char
);
2385 defsubr (&Ssplit_char
);
2386 defsubr (&Smake_char
);
2387 defsubr (&Schar_charset
);
2388 defsubr (&Scharset_after
);
2389 defsubr (&Siso_charset
);
2390 defsubr (&Sclear_charset_maps
);
2391 defsubr (&Scharset_priority_list
);
2392 defsubr (&Sset_charset_priority
);
2393 defsubr (&Scharset_id_internal
);
2394 defsubr (&Ssort_charsets
);
2396 DEFVAR_LISP ("charset-map-path", Vcharset_map_path
,
2397 doc
: /* *List of directories to search for charset map files. */);
2398 Vcharset_map_path
= Qnil
;
2400 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map
,
2401 doc
: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2402 inhibit_load_charset_map
= 0;
2404 DEFVAR_LISP ("charset-list", Vcharset_list
,
2405 doc
: /* List of all charsets ever defined. */);
2406 Vcharset_list
= Qnil
;
2408 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language
,
2409 doc
: /* ISO639 language mnemonic symbol for the current language environment.
2410 If the current language environment is for multiple languages (e.g. "Latin-1"),
2411 the value may be a list of mnemonics. */);
2412 Vcurrent_iso639_language
= Qnil
;
2415 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2416 0, 127, 'B', -1, 0, 1, 0, 0);
2418 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2419 0, 255, -1, -1, -1, 1, 0, 0);
2421 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2422 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2424 = define_charset_internal (Qemacs
, 3, "\x00\xFF\x00\xFF\x00\x3F",
2425 0, MAX_5_BYTE_CHAR
, -1, 0, -1, 1, 1, 0);
2427 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2428 128, 255, -1, 0, -1, 0, 1,
2429 MAX_5_BYTE_CHAR
+ 1);
2430 charset_unibyte
= charset_iso_8859_1
;