1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
4 National Institute of Advanced Industrial Science and Technology (AIST)
5 Registration Number H14PRO021
6 Copyright (C) 2003, 2004
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H13PRO009
10 This file is part of GNU Emacs.
12 GNU Emacs is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 2, or (at your option)
17 GNU Emacs is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
22 You should have received a copy of the GNU General Public License
23 along with GNU Emacs; see the file COPYING. If not, write to
24 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 Boston, MA 02110-1301, USA. */
32 #include <sys/types.h>
34 #include "character.h"
40 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
42 A coded character set ("charset" hereafter) is a meaningful
43 collection (i.e. language, culture, functionality, etc.) of
44 characters. Emacs handles multiple charsets at once. In Emacs Lisp
45 code, a charset is represented by a symbol. In C code, a charset is
46 represented by its ID number or by a pointer to a struct charset.
48 The actual information about each charset is stored in two places.
49 Lispy information is stored in the hash table Vcharset_hash_table as
50 a vector (charset attributes). The other information is stored in
51 charset_table as a struct charset.
55 /* List of all charsets. This variable is used only from Emacs
57 Lisp_Object Vcharset_list
;
59 /* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61 Lisp_Object Vcharset_hash_table
;
63 /* Table of struct charset. */
64 struct charset
*charset_table
;
66 static int charset_table_size
;
67 static int charset_table_used
;
69 Lisp_Object Qcharsetp
;
71 /* Special charset symbols. */
73 Lisp_Object Qeight_bit
;
74 Lisp_Object Qiso_8859_1
;
77 /* The corresponding charsets. */
79 int charset_eight_bit
;
80 int charset_iso_8859_1
;
83 /* The other special charsets. */
84 int charset_jisx0201_roman
;
85 int charset_jisx0208_1978
;
88 /* Value of charset attribute `charset-iso-plane'. */
91 /* Charset of unibyte characters. */
94 /* List of charsets ordered by the priority. */
95 Lisp_Object Vcharset_ordered_list
;
97 /* Incremented everytime we change Vcharset_ordered_list. This is
98 unsigned short so that it fits in Lisp_Int and never matches
100 unsigned short charset_ordered_list_tick
;
102 /* List of iso-2022 charsets. */
103 Lisp_Object Viso_2022_charset_list
;
105 /* List of emacs-mule charsets. */
106 Lisp_Object Vemacs_mule_charset_list
;
108 struct charset
*emacs_mule_charset
[256];
110 /* Mapping table from ISO2022's charset (specified by DIMENSION,
111 CHARS, and FINAL-CHAR) to Emacs' charset. */
112 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
114 Lisp_Object Vcharset_map_path
;
116 Lisp_Object Vchar_unified_charset_table
;
118 /* Defined in chartab.c */
120 map_char_table_for_charset
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
121 Lisp_Object function
, Lisp_Object table
,
122 Lisp_Object arg
, struct charset
*charset
,
123 unsigned from
, unsigned to
));
125 #define CODE_POINT_TO_INDEX(charset, code) \
126 ((charset)->code_linear_p \
127 ? (code) - (charset)->min_code \
128 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
129 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
130 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
131 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
132 ? (((((code) >> 24) - (charset)->code_space[12]) \
133 * (charset)->code_space[11]) \
134 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
135 * (charset)->code_space[7]) \
136 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
137 * (charset)->code_space[3]) \
138 + (((code) & 0xFF) - (charset)->code_space[0]) \
139 - ((charset)->char_index_offset)) \
143 /* Convert the character index IDX to code-point CODE for CHARSET.
144 It is assumed that IDX is in a valid range. */
146 #define INDEX_TO_CODE_POINT(charset, idx) \
147 ((charset)->code_linear_p \
148 ? (idx) + (charset)->min_code \
149 : (idx += (charset)->char_index_offset, \
150 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
151 | (((charset)->code_space[4] \
152 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
154 | (((charset)->code_space[8] \
155 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
157 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
163 /* Set to 1 to warn that a charset map is loaded and thus a buffer
164 text and a string data may be relocated. */
165 int charset_map_loaded
;
167 struct charset_map_entries
173 struct charset_map_entries
*next
;
176 /* Load the mapping information for CHARSET from ENTRIES.
178 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
180 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
181 CHARSET->decoder, and CHARSET->encoder.
183 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
184 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
188 load_charset_map (charset
, entries
, n_entries
, control_flag
)
189 struct charset
*charset
;
190 struct charset_map_entries
*entries
;
194 Lisp_Object vec
, table
;
195 unsigned max_code
= CHARSET_MAX_CODE (charset
);
196 int ascii_compatible_p
= charset
->ascii_compatible_p
;
197 int min_char
, max_char
, nonascii_min_char
;
199 unsigned char *fast_map
= charset
->fast_map
;
204 if (control_flag
> 0)
206 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
208 table
= Fmake_char_table (Qnil
, Qnil
);
209 if (control_flag
== 1)
210 vec
= Fmake_vector (make_number (n
), make_number (-1));
211 else if (! CHAR_TABLE_P (Vchar_unify_table
))
212 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
214 charset_map_loaded
= 1;
217 min_char
= max_char
= entries
->entry
[0].c
;
218 nonascii_min_char
= MAX_CHAR
;
219 for (i
= 0; i
< n_entries
; i
++)
222 int from_index
, to_index
;
224 int idx
= i
% 0x10000;
226 if (i
> 0 && idx
== 0)
227 entries
= entries
->next
;
228 from
= entries
->entry
[idx
].from
;
229 to
= entries
->entry
[idx
].to
;
230 from_c
= entries
->entry
[idx
].c
;
231 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
234 to_index
= from_index
;
239 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
240 to_c
= from_c
+ (to_index
- from_index
);
242 if (from_index
< 0 || to_index
< 0)
245 if (control_flag
< 2)
251 else if (from_c
< min_char
)
253 if (ascii_compatible_p
)
255 if (! ASCII_BYTE_P (from_c
))
257 if (from_c
< nonascii_min_char
)
258 nonascii_min_char
= from_c
;
260 else if (! ASCII_BYTE_P (to_c
))
262 nonascii_min_char
= 0x80;
266 for (c
= from_c
; c
<= to_c
; c
++)
267 CHARSET_FAST_MAP_SET (c
, fast_map
);
269 if (control_flag
== 1)
271 unsigned code
= from
;
273 if (CHARSET_COMPACT_CODES_P (charset
))
276 ASET (vec
, from_index
, make_number (from_c
));
277 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
278 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
279 if (from_index
== to_index
)
281 from_index
++, from_c
++;
282 code
= INDEX_TO_CODE_POINT (charset
, from_index
);
285 for (; from_index
<= to_index
; from_index
++, from_c
++)
287 ASET (vec
, from_index
, make_number (from_c
));
288 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
289 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
295 unsigned code
= from
;
299 int c1
= DECODE_CHAR (charset
, code
);
303 CHAR_TABLE_SET (table
, from_c
, make_number (c1
));
304 CHAR_TABLE_SET (Vchar_unify_table
, c1
, make_number (from_c
));
305 if (CHAR_TABLE_P (Vchar_unified_charset_table
))
306 CHAR_TABLE_SET (Vchar_unified_charset_table
, c1
,
307 CHARSET_NAME (charset
));
309 if (from_index
== to_index
)
311 from_index
++, from_c
++;
312 code
= INDEX_TO_CODE_POINT (charset
, from_index
);
317 if (control_flag
< 2)
319 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
320 ? nonascii_min_char
: min_char
);
321 CHARSET_MAX_CHAR (charset
) = max_char
;
322 if (control_flag
== 1)
324 CHARSET_DECODER (charset
) = vec
;
325 CHARSET_ENCODER (charset
) = table
;
329 CHARSET_DEUNIFIER (charset
) = table
;
333 /* Read a hexadecimal number (preceded by "0x") from the file FP while
334 paying attention to comment charcter '#'. */
336 static INLINE
unsigned
344 while ((c
= getc (fp
)) != EOF
)
348 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
352 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
364 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
366 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
368 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
369 n
= (n
* 10) + c
- '0';
376 /* Return a mapping vector for CHARSET loaded from MAPFILE.
377 Each line of MAPFILE has this form
379 where 0xAAAA is a code-point and 0xCCCC is the corresponding
380 character code, or this form
382 where 0xAAAA and 0xBBBB are code-points specifying a range, and
383 0xCCCC is the first character code of the range.
385 The returned vector has this form:
386 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
387 where CODE1 is a code-point or a cons of code-points specifying a
390 extern void add_to_log
P_ ((char *, Lisp_Object
, Lisp_Object
));
393 load_charset_map_from_file (charset
, mapfile
, control_flag
)
394 struct charset
*charset
;
398 unsigned min_code
= CHARSET_MIN_CODE (charset
);
399 unsigned max_code
= CHARSET_MAX_CODE (charset
);
403 Lisp_Object suffixes
;
404 struct charset_map_entries
*head
, *entries
;
407 suffixes
= Fcons (build_string (".map"),
408 Fcons (build_string (".TXT"), Qnil
));
410 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
412 || ! (fp
= fdopen (fd
, "r")))
414 add_to_log ("Failure in loading charset map: %S", mapfile
, Qnil
);
418 head
= entries
= ((struct charset_map_entries
*)
419 alloca (sizeof (struct charset_map_entries
)));
428 from
= read_hex (fp
, &eof
);
431 if (getc (fp
) == '-')
432 to
= read_hex (fp
, &eof
);
435 c
= (int) read_hex (fp
, &eof
);
437 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
440 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
442 entries
->next
= ((struct charset_map_entries
*)
443 alloca (sizeof (struct charset_map_entries
)));
444 entries
= entries
->next
;
446 idx
= n_entries
% 0x10000;
447 entries
->entry
[idx
].from
= from
;
448 entries
->entry
[idx
].to
= to
;
449 entries
->entry
[idx
].c
= c
;
455 load_charset_map (charset
, head
, n_entries
, control_flag
);
459 load_charset_map_from_vector (charset
, vec
, control_flag
)
460 struct charset
*charset
;
464 unsigned min_code
= CHARSET_MIN_CODE (charset
);
465 unsigned max_code
= CHARSET_MAX_CODE (charset
);
466 struct charset_map_entries
*head
, *entries
;
468 int len
= ASIZE (vec
);
473 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
477 head
= entries
= ((struct charset_map_entries
*)
478 alloca (sizeof (struct charset_map_entries
)));
480 for (i
= 0; i
< len
; i
+= 2)
482 Lisp_Object val
, val2
;
494 from
= XFASTINT (val
);
495 to
= XFASTINT (val2
);
500 from
= to
= XFASTINT (val
);
502 val
= AREF (vec
, i
+ 1);
506 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
509 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
511 entries
->next
= ((struct charset_map_entries
*)
512 alloca (sizeof (struct charset_map_entries
)));
513 entries
= entries
->next
;
515 idx
= n_entries
% 0x10000;
516 entries
->entry
[idx
].from
= from
;
517 entries
->entry
[idx
].to
= to
;
518 entries
->entry
[idx
].c
= c
;
522 load_charset_map (charset
, head
, n_entries
, control_flag
);
526 load_charset (charset
)
527 struct charset
*charset
;
529 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP_DEFERRED
)
533 map
= CHARSET_MAP (charset
);
535 load_charset_map_from_file (charset
, map
, 1);
537 load_charset_map_from_vector (charset
, map
, 1);
538 CHARSET_METHOD (charset
) = CHARSET_METHOD_MAP
;
543 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
544 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
548 return (CHARSETP (object
) ? Qt
: Qnil
);
553 map_charset_chars (c_function
, function
, arg
,
555 void (*c_function
) P_ ((Lisp_Object
, Lisp_Object
));
556 Lisp_Object function
, arg
;
557 struct charset
*charset
;
563 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP_DEFERRED
)
564 load_charset (charset
);
566 partial
= (from
> CHARSET_MIN_CODE (charset
)
567 || to
< CHARSET_MAX_CODE (charset
));
569 if (CHARSET_UNIFIED_P (charset
)
570 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
572 map_char_table_for_charset (c_function
, function
,
573 CHARSET_DEUNIFIER (charset
), arg
,
574 partial
? charset
: NULL
, from
, to
);
577 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
579 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
580 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
581 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
582 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
584 range
= Fcons (make_number (from_c
), make_number (to_c
));
586 (*c_function
) (arg
, range
);
588 call2 (function
, range
, arg
);
590 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
592 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
594 if (CHARSET_ASCII_COMPATIBLE_P (charset
) && from
<= 127)
596 range
= Fcons (make_number (from
), make_number (to
));
598 XSETCAR (range
, make_number (127));
601 (*c_function
) (arg
, range
);
603 call2 (function
, range
, arg
);
605 map_char_table_for_charset (c_function
, function
,
606 CHARSET_ENCODER (charset
), arg
,
607 partial
? charset
: NULL
, from
, to
);
609 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
611 Lisp_Object subset_info
;
614 subset_info
= CHARSET_SUBSET (charset
);
615 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
616 offset
= XINT (AREF (subset_info
, 3));
618 if (from
< XFASTINT (AREF (subset_info
, 1)))
619 from
= XFASTINT (AREF (subset_info
, 1));
621 if (to
> XFASTINT (AREF (subset_info
, 2)))
622 to
= XFASTINT (AREF (subset_info
, 2));
623 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
625 else /* i.e. CHARSET_METHOD_SUPERSET */
629 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
630 parents
= XCDR (parents
))
633 unsigned this_from
, this_to
;
635 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
636 offset
= XINT (XCDR (XCAR (parents
)));
637 this_from
= from
- offset
;
638 this_to
= to
- offset
;
639 if (this_from
< CHARSET_MIN_CODE (charset
))
640 this_from
= CHARSET_MIN_CODE (charset
);
641 if (this_to
> CHARSET_MAX_CODE (charset
))
642 this_to
= CHARSET_MAX_CODE (charset
);
643 map_charset_chars (c_function
, function
, arg
, charset
,
649 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
650 doc
: /* Call FUNCTION for all characters in CHARSET.
651 FUNCTION is called with an argument RANGE and the optional 3rd
654 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
655 characters contained in CHARSET.
657 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
658 range of code points of target characters. */)
659 (function
, charset
, arg
, from_code
, to_code
)
660 Lisp_Object function
, charset
, arg
, from_code
, to_code
;
665 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
666 if (NILP (from_code
))
667 from
= CHARSET_MIN_CODE (cs
);
670 CHECK_NATNUM (from_code
);
671 from
= XINT (from_code
);
672 if (from
< CHARSET_MIN_CODE (cs
))
673 from
= CHARSET_MIN_CODE (cs
);
676 to
= CHARSET_MAX_CODE (cs
);
679 CHECK_NATNUM (to_code
);
681 if (to
> CHARSET_MAX_CODE (cs
))
682 to
= CHARSET_MAX_CODE (cs
);
684 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
689 /* Define a charset according to the arguments. The Nth argument is
690 the Nth attribute of the charset (the last attribute `charset-id'
691 is not included). See the docstring of `define-charset' for the
694 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
695 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
696 doc
: /* For internal use only.
697 usage: (define-charset-internal ...) */)
702 /* Charset attr vector. */
706 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
708 struct charset charset
;
711 int new_definition_p
;
714 if (nargs
!= charset_arg_max
)
715 return Fsignal (Qwrong_number_of_arguments
,
716 Fcons (intern ("define-charset-internal"),
717 make_number (nargs
)));
719 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
721 CHECK_SYMBOL (args
[charset_arg_name
]);
722 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
724 val
= args
[charset_arg_code_space
];
725 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
727 int min_byte
, max_byte
;
729 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
730 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
731 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
732 error ("Invalid :code-space value");
733 charset
.code_space
[i
* 4] = min_byte
;
734 charset
.code_space
[i
* 4 + 1] = max_byte
;
735 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
736 nchars
*= charset
.code_space
[i
* 4 + 2];
737 charset
.code_space
[i
* 4 + 3] = nchars
;
742 val
= args
[charset_arg_dimension
];
744 charset
.dimension
= dimension
;
748 charset
.dimension
= XINT (val
);
749 if (charset
.dimension
< 1 || charset
.dimension
> 4)
750 args_out_of_range_3 (val
, make_number (1), make_number (4));
753 charset
.code_linear_p
754 = (charset
.dimension
== 1
755 || (charset
.code_space
[2] == 256
756 && (charset
.dimension
== 2
757 || (charset
.code_space
[6] == 256
758 && (charset
.dimension
== 3
759 || charset
.code_space
[10] == 256)))));
761 if (! charset
.code_linear_p
)
763 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
764 bzero (charset
.code_space_mask
, 256);
765 for (i
= 0; i
< 4; i
++)
766 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
768 charset
.code_space_mask
[j
] |= (1 << i
);
771 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
773 charset
.min_code
= (charset
.code_space
[0]
774 | (charset
.code_space
[4] << 8)
775 | (charset
.code_space
[8] << 16)
776 | (charset
.code_space
[12] << 24));
777 charset
.max_code
= (charset
.code_space
[1]
778 | (charset
.code_space
[5] << 8)
779 | (charset
.code_space
[9] << 16)
780 | (charset
.code_space
[13] << 24));
781 charset
.char_index_offset
= 0;
783 val
= args
[charset_arg_min_code
];
793 CHECK_NUMBER_CAR (val
);
794 CHECK_NUMBER_CDR (val
);
795 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
797 if (code
< charset
.min_code
798 || code
> charset
.max_code
)
799 args_out_of_range_3 (make_number (charset
.min_code
),
800 make_number (charset
.max_code
), val
);
801 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
802 charset
.min_code
= code
;
805 val
= args
[charset_arg_max_code
];
815 CHECK_NUMBER_CAR (val
);
816 CHECK_NUMBER_CDR (val
);
817 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
819 if (code
< charset
.min_code
820 || code
> charset
.max_code
)
821 args_out_of_range_3 (make_number (charset
.min_code
),
822 make_number (charset
.max_code
), val
);
823 charset
.max_code
= code
;
826 charset
.compact_codes_p
= charset
.max_code
< 0x1000000;
828 val
= args
[charset_arg_invalid_code
];
831 if (charset
.min_code
> 0)
832 charset
.invalid_code
= 0;
835 XSETINT (val
, charset
.max_code
+ 1);
836 if (XINT (val
) == charset
.max_code
+ 1)
837 charset
.invalid_code
= charset
.max_code
+ 1;
839 error ("Attribute :invalid-code must be specified");
845 charset
.invalid_code
= XFASTINT (val
);
848 val
= args
[charset_arg_iso_final
];
850 charset
.iso_final
= -1;
854 if (XINT (val
) < '0' || XINT (val
) > 127)
855 error ("Invalid iso-final-char: %d", XINT (val
));
856 charset
.iso_final
= XINT (val
);
859 val
= args
[charset_arg_iso_revision
];
861 charset
.iso_revision
= -1;
866 args_out_of_range (make_number (63), val
);
867 charset
.iso_revision
= XINT (val
);
870 val
= args
[charset_arg_emacs_mule_id
];
872 charset
.emacs_mule_id
= -1;
876 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
877 error ("Invalid emacs-mule-id: %d", XINT (val
));
878 charset
.emacs_mule_id
= XINT (val
);
881 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
883 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
885 charset
.unified_p
= 0;
887 bzero (charset
.fast_map
, sizeof (charset
.fast_map
));
889 if (! NILP (args
[charset_arg_code_offset
]))
891 val
= args
[charset_arg_code_offset
];
894 charset
.method
= CHARSET_METHOD_OFFSET
;
895 charset
.code_offset
= XINT (val
);
897 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
898 charset
.min_char
= i
+ charset
.code_offset
;
899 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
900 charset
.max_char
= i
+ charset
.code_offset
;
901 if (charset
.max_char
> MAX_CHAR
)
902 error ("Unsupported max char: %d", charset
.max_char
);
904 i
= (charset
.min_char
>> 7) << 7;
905 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
906 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
908 for (; i
<= charset
.max_char
; i
+= 0x1000)
909 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
911 else if (! NILP (args
[charset_arg_map
]))
913 val
= args
[charset_arg_map
];
914 ASET (attrs
, charset_map
, val
);
916 load_charset_map_from_file (&charset
, val
, 0);
918 load_charset_map_from_vector (&charset
, val
, 0);
919 charset
.method
= CHARSET_METHOD_MAP_DEFERRED
;
921 else if (! NILP (args
[charset_arg_subset
]))
924 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
925 struct charset
*parent_charset
;
927 val
= args
[charset_arg_subset
];
929 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
930 parent_min_code
= Fnth (make_number (1), val
);
931 CHECK_NATNUM (parent_min_code
);
932 parent_max_code
= Fnth (make_number (2), val
);
933 CHECK_NATNUM (parent_max_code
);
934 parent_code_offset
= Fnth (make_number (3), val
);
935 CHECK_NUMBER (parent_code_offset
);
936 val
= Fmake_vector (make_number (4), Qnil
);
937 ASET (val
, 0, make_number (parent_charset
->id
));
938 ASET (val
, 1, parent_min_code
);
939 ASET (val
, 2, parent_max_code
);
940 ASET (val
, 3, parent_code_offset
);
941 ASET (attrs
, charset_subset
, val
);
943 charset
.method
= CHARSET_METHOD_SUBSET
;
944 /* Here, we just copy the parent's fast_map. It's not accurate,
945 but at least it works for quickly detecting which character
946 DOESN'T belong to this charset. */
947 for (i
= 0; i
< 190; i
++)
948 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
950 /* We also copy these for parents. */
951 charset
.min_char
= parent_charset
->min_char
;
952 charset
.max_char
= parent_charset
->max_char
;
954 else if (! NILP (args
[charset_arg_superset
]))
956 val
= args
[charset_arg_superset
];
957 charset
.method
= CHARSET_METHOD_SUPERSET
;
958 val
= Fcopy_sequence (val
);
959 ASET (attrs
, charset_superset
, val
);
961 charset
.min_char
= MAX_CHAR
;
962 charset
.max_char
= 0;
963 for (; ! NILP (val
); val
= Fcdr (val
))
965 Lisp_Object elt
, car_part
, cdr_part
;
967 struct charset
*this_charset
;
972 car_part
= XCAR (elt
);
973 cdr_part
= XCDR (elt
);
974 CHECK_CHARSET_GET_ID (car_part
, this_id
);
975 CHECK_NUMBER (cdr_part
);
976 offset
= XINT (cdr_part
);
980 CHECK_CHARSET_GET_ID (elt
, this_id
);
983 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
985 this_charset
= CHARSET_FROM_ID (this_id
);
986 if (charset
.min_char
> this_charset
->min_char
)
987 charset
.min_char
= this_charset
->min_char
;
988 if (charset
.max_char
< this_charset
->max_char
)
989 charset
.max_char
= this_charset
->max_char
;
990 for (i
= 0; i
< 190; i
++)
991 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
995 error ("None of :code-offset, :map, :parents are specified");
997 val
= args
[charset_arg_unify_map
];
998 if (! NILP (val
) && !STRINGP (val
))
1000 ASET (attrs
, charset_unify_map
, val
);
1002 CHECK_LIST (args
[charset_arg_plist
]);
1003 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
1005 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
1007 if (charset
.hash_index
>= 0)
1009 new_definition_p
= 0;
1010 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1011 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1015 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1017 if (charset_table_used
== charset_table_size
)
1019 struct charset
*new_table
1020 = (struct charset
*) xmalloc (sizeof (struct charset
)
1021 * (charset_table_size
+ 16));
1022 bcopy (charset_table
, new_table
,
1023 sizeof (struct charset
) * charset_table_size
);
1024 charset_table_size
+= 16;
1025 charset_table
= new_table
;
1027 id
= charset_table_used
++;
1028 new_definition_p
= 1;
1031 ASET (attrs
, charset_id
, make_number (id
));
1033 charset_table
[id
] = charset
;
1035 if (charset
.iso_final
>= 0)
1037 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1038 charset
.iso_final
) = id
;
1039 if (new_definition_p
)
1040 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1041 Fcons (make_number (id
), Qnil
));
1042 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1043 charset_jisx0201_roman
= id
;
1044 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1045 charset_jisx0208_1978
= id
;
1046 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1047 charset_jisx0208
= id
;
1050 if (charset
.emacs_mule_id
>= 0)
1052 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
1053 if (charset
.emacs_mule_id
< 0xA0)
1054 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1055 if (new_definition_p
)
1056 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1057 Fcons (make_number (id
), Qnil
));
1060 if (new_definition_p
)
1062 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1063 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1064 Fcons (make_number (id
), Qnil
));
1065 charset_ordered_list_tick
++;
1072 /* Same as Fdefine_charset_internal but arguments are more convenient
1073 to call from C (typically in syms_of_charset). This can define a
1074 charset of `offset' method only. Return the ID of the new
1078 define_charset_internal (name
, dimension
, code_space
, min_code
, max_code
,
1079 iso_final
, iso_revision
, emacs_mule_id
,
1080 ascii_compatible
, supplementary
,
1084 unsigned char *code_space
;
1085 unsigned min_code
, max_code
;
1086 int iso_final
, iso_revision
, emacs_mule_id
;
1087 int ascii_compatible
, supplementary
;
1090 Lisp_Object args
[charset_arg_max
];
1091 Lisp_Object plist
[14];
1095 args
[charset_arg_name
] = name
;
1096 args
[charset_arg_dimension
] = make_number (dimension
);
1097 val
= Fmake_vector (make_number (8), make_number (0));
1098 for (i
= 0; i
< 8; i
++)
1099 ASET (val
, i
, make_number (code_space
[i
]));
1100 args
[charset_arg_code_space
] = val
;
1101 args
[charset_arg_min_code
] = make_number (min_code
);
1102 args
[charset_arg_max_code
] = make_number (max_code
);
1103 args
[charset_arg_iso_final
]
1104 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1105 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1106 args
[charset_arg_emacs_mule_id
]
1107 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1108 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1109 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1110 args
[charset_arg_invalid_code
] = Qnil
;
1111 args
[charset_arg_code_offset
] = make_number (code_offset
);
1112 args
[charset_arg_map
] = Qnil
;
1113 args
[charset_arg_subset
] = Qnil
;
1114 args
[charset_arg_superset
] = Qnil
;
1115 args
[charset_arg_unify_map
] = Qnil
;
1117 plist
[0] = intern (":name");
1118 plist
[1] = args
[charset_arg_name
];
1119 plist
[2] = intern (":dimension");
1120 plist
[3] = args
[charset_arg_dimension
];
1121 plist
[4] = intern (":code-space");
1122 plist
[5] = args
[charset_arg_code_space
];
1123 plist
[6] = intern (":iso-final-char");
1124 plist
[7] = args
[charset_arg_iso_final
];
1125 plist
[8] = intern (":emacs-mule-id");
1126 plist
[9] = args
[charset_arg_emacs_mule_id
];
1127 plist
[10] = intern (":ascii-compatible-p");
1128 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1129 plist
[12] = intern (":code-offset");
1130 plist
[13] = args
[charset_arg_code_offset
];
1132 args
[charset_arg_plist
] = Flist (14, plist
);
1133 Fdefine_charset_internal (charset_arg_max
, args
);
1135 return XINT (CHARSET_SYMBOL_ID (name
));
1139 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1140 Sdefine_charset_alias
, 2, 2, 0,
1141 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1143 Lisp_Object alias
, charset
;
1147 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1148 Fputhash (alias
, attr
, Vcharset_hash_table
);
1149 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1154 DEFUN ("unibyte-charset", Funibyte_charset
, Sunibyte_charset
, 0, 0, 0,
1155 doc
: /* Return the unibyte charset (set by `set-unibyte-charset'). */)
1158 return CHARSET_NAME (CHARSET_FROM_ID (charset_unibyte
));
1162 DEFUN ("set-unibyte-charset", Fset_unibyte_charset
, Sset_unibyte_charset
,
1164 doc
: /* Set the unibyte charset to CHARSET.
1165 This determines how unibyte/multibyte conversion is done. See also
1166 function `unibyte-charset'. */)
1168 Lisp_Object charset
;
1173 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
1174 if (! cs
->ascii_compatible_p
1175 || cs
->dimension
!= 1)
1176 error ("Inappropriate unibyte charset: %s", SDATA (SYMBOL_NAME (charset
)));
1177 charset_unibyte
= cs
->id
;
1178 memset (unibyte_has_multibyte_table
, 1, 128);
1179 for (i
= 128; i
< 256; i
++)
1181 c
= DECODE_CHAR (cs
, i
);
1182 unibyte_to_multibyte_table
[i
] = (c
< 0 ? BYTE8_TO_CHAR (i
) : c
);
1183 unibyte_has_multibyte_table
[i
] = c
>= 0;
1190 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1191 doc
: /* Return the property list of CHARSET. */)
1193 Lisp_Object charset
;
1197 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1198 return CHARSET_ATTR_PLIST (attrs
);
1202 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1203 doc
: /* Set CHARSET's property list to PLIST. */)
1205 Lisp_Object charset
, plist
;
1209 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1210 CHARSET_ATTR_PLIST (attrs
) = plist
;
1215 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1216 doc
: /* Unify characters of CHARSET with Unicode.
1217 This means reading the relevant file and installing the table defined
1218 by CHARSET's `:unify-map' property.
1220 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1221 the same meaning as the `:unify-map' attribute in the function
1222 `define-charset' (which see).
1224 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1225 (charset
, unify_map
, deunify
)
1226 Lisp_Object charset
, unify_map
, deunify
;
1231 CHECK_CHARSET_GET_ID (charset
, id
);
1232 cs
= CHARSET_FROM_ID (id
);
1233 if (CHARSET_METHOD (cs
) == CHARSET_METHOD_MAP_DEFERRED
)
1236 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1237 : ! CHARSET_UNIFIED_P (cs
))
1240 CHARSET_UNIFIED_P (cs
) = 0;
1243 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
)
1244 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1245 if (NILP (unify_map
))
1246 unify_map
= CHARSET_UNIFY_MAP (cs
);
1247 if (STRINGP (unify_map
))
1248 load_charset_map_from_file (cs
, unify_map
, 2);
1249 else if (VECTORP (unify_map
))
1250 load_charset_map_from_vector (cs
, unify_map
, 2);
1251 else if (NILP (unify_map
))
1252 error ("No unify-map for charset");
1254 error ("Bad unify-map arg");
1255 CHARSET_UNIFIED_P (cs
) = 1;
1257 else if (CHAR_TABLE_P (Vchar_unify_table
))
1259 int min_code
= CHARSET_MIN_CODE (cs
);
1260 int max_code
= CHARSET_MAX_CODE (cs
);
1261 int min_char
= DECODE_CHAR (cs
, min_code
);
1262 int max_char
= DECODE_CHAR (cs
, max_code
);
1264 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1270 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1271 Sget_unused_iso_final_char
, 2, 2, 0,
1273 Return an unused ISO final char for a charset of DIMENISION and CHARS.
1274 DIMENSION is the number of bytes to represent a character: 1 or 2.
1275 CHARS is the number of characters in a dimension: 94 or 96.
1277 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1278 If there's no unused final char for the specified kind of charset,
1281 Lisp_Object dimension
, chars
;
1285 CHECK_NUMBER (dimension
);
1286 CHECK_NUMBER (chars
);
1287 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1288 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1289 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1290 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1291 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1292 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1294 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1298 check_iso_charset_parameter (dimension
, chars
, final_char
)
1299 Lisp_Object dimension
, chars
, final_char
;
1301 CHECK_NATNUM (dimension
);
1302 CHECK_NATNUM (chars
);
1303 CHECK_NATNUM (final_char
);
1305 if (XINT (dimension
) > 3)
1306 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1307 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1308 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1309 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1310 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1314 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1316 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1318 On decoding by an ISO-2022 base coding system, when a charset
1319 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1320 if CHARSET is designated instead. */)
1321 (dimension
, chars
, final_char
, charset
)
1322 Lisp_Object dimension
, chars
, final_char
, charset
;
1327 CHECK_CHARSET_GET_ID (charset
, id
);
1328 check_iso_charset_parameter (dimension
, chars
, final_char
);
1329 chars_flag
= XINT (chars
) == 96;
1330 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1335 /* Return information about charsets in the text at PTR of NBYTES
1336 bytes, which are NCHARS characters. The value is:
1338 0: Each character is represented by one byte. This is always
1339 true for a unibyte string. For a multibyte string, true if
1340 it contains only ASCII characters.
1342 1: No charsets other than ascii, control-1, and latin-1 are
1349 string_xstring_p (string
)
1352 const unsigned char *p
= SDATA (string
);
1353 const unsigned char *endp
= p
+ SBYTES (string
);
1355 if (SCHARS (string
) == SBYTES (string
))
1360 int c
= STRING_CHAR_ADVANCE (p
);
1369 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1371 CHARSETS is a vector. If Nth element is non-nil, it means the
1372 charset whose id is N is already found.
1374 It may lookup a translation table TABLE if supplied. */
1377 find_charsets_in_text (ptr
, nchars
, nbytes
, charsets
, table
, multibyte
)
1378 const unsigned char *ptr
;
1379 EMACS_INT nchars
, nbytes
;
1380 Lisp_Object charsets
, table
;
1383 const unsigned char *pend
= ptr
+ nbytes
;
1385 if (nchars
== nbytes
)
1388 ASET (charsets
, charset_ascii
, Qt
);
1395 c
= translate_char (table
, c
);
1396 if (ASCII_BYTE_P (c
))
1397 ASET (charsets
, charset_ascii
, Qt
);
1399 ASET (charsets
, charset_eight_bit
, Qt
);
1406 int c
= STRING_CHAR_ADVANCE (ptr
);
1407 struct charset
*charset
;
1410 c
= translate_char (table
, c
);
1411 charset
= CHAR_CHARSET (c
);
1412 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1417 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1419 doc
: /* Return a list of charsets in the region between BEG and END.
1420 BEG and END are buffer positions.
1421 Optional arg TABLE if non-nil is a translation table to look up.
1423 If the current buffer is unibyte, the returned list may contain
1424 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1426 Lisp_Object beg
, end
, table
;
1428 Lisp_Object charsets
;
1429 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1432 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1434 validate_region (&beg
, &end
);
1435 from
= XFASTINT (beg
);
1436 stop
= to
= XFASTINT (end
);
1438 if (from
< GPT
&& GPT
< to
)
1441 stop_byte
= GPT_BYTE
;
1444 stop_byte
= CHAR_TO_BYTE (stop
);
1446 from_byte
= CHAR_TO_BYTE (from
);
1448 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1451 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1452 stop_byte
- from_byte
, charsets
, table
,
1456 from
= stop
, from_byte
= stop_byte
;
1457 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1464 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1465 if (!NILP (AREF (charsets
, i
)))
1466 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1470 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1472 doc
: /* Return a list of charsets in STR.
1473 Optional arg TABLE if non-nil is a translation table to look up.
1475 If STR is unibyte, the returned list may contain
1476 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1478 Lisp_Object str
, table
;
1480 Lisp_Object charsets
;
1486 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1487 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1489 STRING_MULTIBYTE (str
));
1491 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1492 if (!NILP (AREF (charsets
, i
)))
1493 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1499 /* Return a character correponding to the code-point CODE of
1503 decode_char (charset
, code
)
1504 struct charset
*charset
;
1508 enum charset_method method
= CHARSET_METHOD (charset
);
1510 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1513 if (method
== CHARSET_METHOD_MAP_DEFERRED
)
1515 load_charset (charset
);
1516 method
= CHARSET_METHOD (charset
);
1519 if (method
== CHARSET_METHOD_SUBSET
)
1521 Lisp_Object subset_info
;
1523 subset_info
= CHARSET_SUBSET (charset
);
1524 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1525 code
-= XINT (AREF (subset_info
, 3));
1526 if (code
< XFASTINT (AREF (subset_info
, 1))
1527 || code
> XFASTINT (AREF (subset_info
, 2)))
1530 c
= DECODE_CHAR (charset
, code
);
1532 else if (method
== CHARSET_METHOD_SUPERSET
)
1534 Lisp_Object parents
;
1536 parents
= CHARSET_SUPERSET (charset
);
1538 for (; CONSP (parents
); parents
= XCDR (parents
))
1540 int id
= XINT (XCAR (XCAR (parents
)));
1541 int code_offset
= XINT (XCDR (XCAR (parents
)));
1542 unsigned this_code
= code
- code_offset
;
1544 charset
= CHARSET_FROM_ID (id
);
1545 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1551 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1555 if (method
== CHARSET_METHOD_MAP
)
1557 Lisp_Object decoder
;
1559 decoder
= CHARSET_DECODER (charset
);
1560 if (! VECTORP (decoder
))
1562 c
= XINT (AREF (decoder
, char_index
));
1566 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1570 if (CHARSET_UNIFIED_P (charset
)
1573 MAYBE_UNIFY_CHAR (c
);
1579 /* Variable used temporarily by the macro ENCODE_CHAR. */
1580 Lisp_Object charset_work
;
1582 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1583 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1584 use CHARSET's strict_max_char instead of max_char. */
1587 encode_char (charset
, c
)
1588 struct charset
*charset
;
1592 enum charset_method method
= CHARSET_METHOD (charset
);
1594 if (CHARSET_UNIFIED_P (charset
))
1596 Lisp_Object deunifier
, deunified
;
1598 deunifier
= CHARSET_DEUNIFIER (charset
);
1599 if (! CHAR_TABLE_P (deunifier
))
1601 Funify_charset (CHARSET_NAME (charset
), Qnil
, Qnil
);
1602 deunifier
= CHARSET_DEUNIFIER (charset
);
1604 deunified
= CHAR_TABLE_REF (deunifier
, c
);
1605 if (! NILP (deunified
))
1606 c
= XINT (deunified
);
1609 if (method
== CHARSET_METHOD_SUBSET
)
1611 Lisp_Object subset_info
;
1612 struct charset
*this_charset
;
1614 subset_info
= CHARSET_SUBSET (charset
);
1615 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1616 code
= ENCODE_CHAR (this_charset
, c
);
1617 if (code
== CHARSET_INVALID_CODE (this_charset
)
1618 || code
< XFASTINT (AREF (subset_info
, 1))
1619 || code
> XFASTINT (AREF (subset_info
, 2)))
1620 return CHARSET_INVALID_CODE (charset
);
1621 code
+= XINT (AREF (subset_info
, 3));
1625 if (method
== CHARSET_METHOD_SUPERSET
)
1627 Lisp_Object parents
;
1629 parents
= CHARSET_SUPERSET (charset
);
1630 for (; CONSP (parents
); parents
= XCDR (parents
))
1632 int id
= XINT (XCAR (XCAR (parents
)));
1633 int code_offset
= XINT (XCDR (XCAR (parents
)));
1634 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1636 code
= ENCODE_CHAR (this_charset
, c
);
1637 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1638 return code
+ code_offset
;
1640 return CHARSET_INVALID_CODE (charset
);
1643 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1644 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1645 return CHARSET_INVALID_CODE (charset
);
1647 if (method
== CHARSET_METHOD_MAP_DEFERRED
)
1649 load_charset (charset
);
1650 method
= CHARSET_METHOD (charset
);
1653 if (method
== CHARSET_METHOD_MAP
)
1655 Lisp_Object encoder
;
1658 encoder
= CHARSET_ENCODER (charset
);
1659 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1660 return CHARSET_INVALID_CODE (charset
);
1661 val
= CHAR_TABLE_REF (encoder
, c
);
1663 return CHARSET_INVALID_CODE (charset
);
1665 if (! CHARSET_COMPACT_CODES_P (charset
))
1666 code
= INDEX_TO_CODE_POINT (charset
, code
);
1668 else /* method == CHARSET_METHOD_OFFSET */
1670 code
= c
- CHARSET_CODE_OFFSET (charset
);
1671 code
= INDEX_TO_CODE_POINT (charset
, code
);
1678 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1679 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1680 Return nil if CODE-POINT is not valid in CHARSET.
1682 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1684 Optional argument RESTRICTION specifies a way to map the pair of CCS
1685 and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1686 (charset
, code_point
, restriction
)
1687 Lisp_Object charset
, code_point
, restriction
;
1691 struct charset
*charsetp
;
1693 CHECK_CHARSET_GET_ID (charset
, id
);
1694 if (CONSP (code_point
))
1696 CHECK_NATNUM_CAR (code_point
);
1697 CHECK_NATNUM_CDR (code_point
);
1698 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1702 CHECK_NATNUM (code_point
);
1703 code
= XINT (code_point
);
1705 charsetp
= CHARSET_FROM_ID (id
);
1706 c
= DECODE_CHAR (charsetp
, code
);
1707 return (c
>= 0 ? make_number (c
) : Qnil
);
1711 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1712 doc
: /* Encode the character CH into a code-point of CHARSET.
1713 Return nil if CHARSET doesn't include CH.
1715 Optional argument RESTRICTION specifies a way to map CHAR to a
1716 code-point in CCS. Currently not supported and just ignored. */)
1717 (ch
, charset
, restriction
)
1718 Lisp_Object ch
, charset
, restriction
;
1722 struct charset
*charsetp
;
1724 CHECK_CHARSET_GET_ID (charset
, id
);
1726 charsetp
= CHARSET_FROM_ID (id
);
1727 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1728 if (code
== CHARSET_INVALID_CODE (charsetp
))
1730 if (code
> 0x7FFFFFF)
1731 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1732 return make_number (code
);
1736 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1738 /* Return a character of CHARSET whose position codes are CODEn.
1740 CODE1 through CODE4 are optional, but if you don't supply sufficient
1741 position codes, it is assumed that the minimum code in each dimension
1743 (charset
, code1
, code2
, code3
, code4
)
1744 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1747 struct charset
*charsetp
;
1751 CHECK_CHARSET_GET_ID (charset
, id
);
1752 charsetp
= CHARSET_FROM_ID (id
);
1754 dimension
= CHARSET_DIMENSION (charsetp
);
1756 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1757 ? 0 : CHARSET_MIN_CODE (charsetp
));
1760 CHECK_NATNUM (code1
);
1761 if (XFASTINT (code1
) >= 0x100)
1762 args_out_of_range (make_number (0xFF), code1
);
1763 code
= XFASTINT (code1
);
1769 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1772 CHECK_NATNUM (code2
);
1773 if (XFASTINT (code2
) >= 0x100)
1774 args_out_of_range (make_number (0xFF), code2
);
1775 code
|= XFASTINT (code2
);
1782 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1785 CHECK_NATNUM (code3
);
1786 if (XFASTINT (code3
) >= 0x100)
1787 args_out_of_range (make_number (0xFF), code3
);
1788 code
|= XFASTINT (code3
);
1795 code
|= charsetp
->code_space
[0];
1798 CHECK_NATNUM (code4
);
1799 if (XFASTINT (code4
) >= 0x100)
1800 args_out_of_range (make_number (0xFF), code4
);
1801 code
|= XFASTINT (code4
);
1808 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1810 c
= DECODE_CHAR (charsetp
, code
);
1812 error ("Invalid code(s)");
1813 return make_number (c
);
1817 /* Return the first charset in CHARSET_LIST that contains C.
1818 CHARSET_LIST is a list of charset IDs. If it is nil, use
1819 Vcharset_ordered_list. */
1822 char_charset (c
, charset_list
, code_return
)
1824 Lisp_Object charset_list
;
1825 unsigned *code_return
;
1827 if (NILP (charset_list
))
1828 charset_list
= Vcharset_ordered_list
;
1830 while (CONSP (charset_list
))
1832 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
1833 unsigned code
= ENCODE_CHAR (charset
, c
);
1835 if (code
!= CHARSET_INVALID_CODE (charset
))
1838 *code_return
= code
;
1841 charset_list
= XCDR (charset_list
);
1847 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
1849 /*Return list of charset and one to four position-codes of CHAR.
1850 The charset is decided by the current priority order of charsets.
1851 A position-code is a byte value of each dimension of the code-point of
1852 CHAR in the charset. */)
1856 struct charset
*charset
;
1861 CHECK_CHARACTER (ch
);
1863 charset
= CHAR_CHARSET (c
);
1866 code
= ENCODE_CHAR (charset
, c
);
1867 if (code
== CHARSET_INVALID_CODE (charset
))
1869 dimension
= CHARSET_DIMENSION (charset
);
1870 for (val
= Qnil
; dimension
> 0; dimension
--)
1872 val
= Fcons (make_number (code
& 0xFF), val
);
1875 return Fcons (CHARSET_NAME (charset
), val
);
1879 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 1, 0,
1880 doc
: /* Return the charset of highest priority that contains CH. */)
1884 struct charset
*charset
;
1886 CHECK_CHARACTER (ch
);
1887 charset
= CHAR_CHARSET (XINT (ch
));
1888 return (CHARSET_NAME (charset
));
1892 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
1894 Return charset of a character in the current buffer at position POS.
1895 If POS is nil, it defauls to the current point.
1896 If POS is out of range, the value is nil. */)
1901 struct charset
*charset
;
1903 ch
= Fchar_after (pos
);
1904 if (! INTEGERP (ch
))
1906 charset
= CHAR_CHARSET (XINT (ch
));
1907 return (CHARSET_NAME (charset
));
1911 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
1913 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1915 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1916 by their DIMENSION, CHARS, and FINAL-CHAR,
1917 where as Emacs distinguishes them by charset symbol.
1918 See the documentation of the function `charset-info' for the meanings of
1919 DIMENSION, CHARS, and FINAL-CHAR. */)
1920 (dimension
, chars
, final_char
)
1921 Lisp_Object dimension
, chars
, final_char
;
1926 check_iso_charset_parameter (dimension
, chars
, final_char
);
1927 chars_flag
= XFASTINT (chars
) == 96;
1928 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
1929 XFASTINT (final_char
));
1930 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
1934 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
1937 Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1941 struct charset
*charset
;
1944 for (i
= 0; i
< charset_table_used
; i
++)
1946 charset
= CHARSET_FROM_ID (i
);
1947 attrs
= CHARSET_ATTRIBUTES (charset
);
1949 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
1951 CHARSET_ATTR_DECODER (attrs
) = Qnil
;
1952 CHARSET_ATTR_ENCODER (attrs
) = Qnil
;
1953 CHARSET_METHOD (charset
) = CHARSET_METHOD_MAP_DEFERRED
;
1956 if (CHARSET_UNIFIED_P (charset
))
1957 CHARSET_ATTR_DEUNIFIER (attrs
) = Qnil
;
1960 if (CHAR_TABLE_P (Vchar_unified_charset_table
))
1962 Foptimize_char_table (Vchar_unified_charset_table
);
1963 Vchar_unify_table
= Vchar_unified_charset_table
;
1964 Vchar_unified_charset_table
= Qnil
;
1970 DEFUN ("charset-priority-list", Fcharset_priority_list
,
1971 Scharset_priority_list
, 0, 1, 0,
1972 doc
: /* Return the list of charsets ordered by priority.
1973 HIGHESTP non-nil means just return the highest priority one. */)
1975 Lisp_Object highestp
;
1977 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
1979 if (!NILP (highestp
))
1980 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
1982 while (!NILP (list
))
1984 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
1987 return Fnreverse (val
);
1990 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
1992 doc
: /* Assign higher priority to the charsets given as arguments.
1993 usage: (set-charset-priority &rest charsets) */)
1998 Lisp_Object new_head
, old_list
, arglist
[2];
1999 Lisp_Object list_2022
, list_emacs_mule
;
2002 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
2004 for (i
= 0; i
< nargs
; i
++)
2006 CHECK_CHARSET_GET_ID (args
[i
], id
);
2007 if (! NILP (Fmemq (make_number (id
), old_list
)))
2009 old_list
= Fdelq (make_number (id
), old_list
);
2010 new_head
= Fcons (make_number (id
), new_head
);
2013 arglist
[0] = Fnreverse (new_head
);
2014 arglist
[1] = old_list
;
2015 Vcharset_ordered_list
= Fnconc (2, arglist
);
2016 charset_ordered_list_tick
++;
2018 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2019 CONSP (old_list
); old_list
= XCDR (old_list
))
2021 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2022 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2023 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2024 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2026 Viso_2022_charset_list
= Fnreverse (list_2022
);
2027 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2032 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2034 doc
: /* Internal use only.
2035 Return charset identification number of CHARSET. */)
2037 Lisp_Object charset
;
2041 CHECK_CHARSET_GET_ID (charset
, id
);
2042 return make_number (id
);
2050 = Fcons (Fexpand_file_name (build_string ("charsets"), Vdata_directory
),
2056 init_charset_once ()
2060 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2061 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2062 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2063 iso_charset_table
[i
][j
][k
] = -1;
2065 for (i
= 0; i
< 256; i
++)
2066 emacs_mule_charset
[i
] = NULL
;
2068 charset_jisx0201_roman
= -1;
2069 charset_jisx0208_1978
= -1;
2070 charset_jisx0208
= -1;
2072 for (i
= 0; i
< 128; i
++)
2073 unibyte_to_multibyte_table
[i
] = i
;
2074 for (; i
< 256; i
++)
2075 unibyte_to_multibyte_table
[i
] = BYTE8_TO_CHAR (i
);
2083 DEFSYM (Qcharsetp
, "charsetp");
2085 DEFSYM (Qascii
, "ascii");
2086 DEFSYM (Qunicode
, "unicode");
2087 DEFSYM (Qeight_bit
, "eight-bit");
2088 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2093 staticpro (&Vcharset_ordered_list
);
2094 Vcharset_ordered_list
= Qnil
;
2096 staticpro (&Viso_2022_charset_list
);
2097 Viso_2022_charset_list
= Qnil
;
2099 staticpro (&Vemacs_mule_charset_list
);
2100 Vemacs_mule_charset_list
= Qnil
;
2102 staticpro (&Vcharset_hash_table
);
2104 Lisp_Object args
[2];
2107 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2110 charset_table_size
= 128;
2111 charset_table
= ((struct charset
*)
2112 xmalloc (sizeof (struct charset
) * charset_table_size
));
2113 charset_table_used
= 0;
2115 staticpro (&Vchar_unified_charset_table
);
2116 Vchar_unified_charset_table
= Fmake_char_table (Qnil
, make_number (-1));
2118 defsubr (&Scharsetp
);
2119 defsubr (&Smap_charset_chars
);
2120 defsubr (&Sdefine_charset_internal
);
2121 defsubr (&Sdefine_charset_alias
);
2122 defsubr (&Sunibyte_charset
);
2123 defsubr (&Sset_unibyte_charset
);
2124 defsubr (&Scharset_plist
);
2125 defsubr (&Sset_charset_plist
);
2126 defsubr (&Sunify_charset
);
2127 defsubr (&Sget_unused_iso_final_char
);
2128 defsubr (&Sdeclare_equiv_charset
);
2129 defsubr (&Sfind_charset_region
);
2130 defsubr (&Sfind_charset_string
);
2131 defsubr (&Sdecode_char
);
2132 defsubr (&Sencode_char
);
2133 defsubr (&Ssplit_char
);
2134 defsubr (&Smake_char
);
2135 defsubr (&Schar_charset
);
2136 defsubr (&Scharset_after
);
2137 defsubr (&Siso_charset
);
2138 defsubr (&Sclear_charset_maps
);
2139 defsubr (&Scharset_priority_list
);
2140 defsubr (&Sset_charset_priority
);
2141 defsubr (&Scharset_id_internal
);
2143 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2144 doc
: /* *Lisp of directories to search for charset map files. */);
2145 Vcharset_map_path
= Qnil
;
2147 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2148 doc
: /* List of all charsets ever defined. */);
2149 Vcharset_list
= Qnil
;
2152 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2153 0, 127, 'B', -1, 0, 1, 0, 0);
2155 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2156 0, 255, -1, -1, -1, 1, 0, 0);
2158 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2159 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2161 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2162 128, 255, -1, 0, -1, 0, 0,
2163 MAX_5_BYTE_CHAR
+ 1);
2168 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2169 (do not change this comment) */