1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
3 2006 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
7 Copyright (C) 2003, 2004
8 National Institute of Advanced Industrial Science and Technology (AIST)
9 Registration Number H13PRO009
11 This file is part of GNU Emacs.
13 GNU Emacs is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
18 GNU Emacs is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
23 You should have received a copy of the GNU General Public License
24 along with GNU Emacs; see the file COPYING. If not, write to
25 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 Boston, MA 02110-1301, USA. */
33 #include <sys/types.h>
35 #include "character.h"
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
56 /* List of all charsets. This variable is used only from Emacs
58 Lisp_Object Vcharset_list
;
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table
;
64 /* Table of struct charset. */
65 struct charset
*charset_table
;
67 static int charset_table_size
;
68 static int charset_table_used
;
70 Lisp_Object Qcharsetp
;
72 /* Special charset symbols. */
74 Lisp_Object Qeight_bit
;
75 Lisp_Object Qiso_8859_1
;
78 /* The corresponding charsets. */
80 int charset_eight_bit
;
81 int charset_iso_8859_1
;
84 /* The other special charsets. */
85 int charset_jisx0201_roman
;
86 int charset_jisx0208_1978
;
89 /* Value of charset attribute `charset-iso-plane'. */
92 /* Charset of unibyte characters. */
95 /* List of charsets ordered by the priority. */
96 Lisp_Object Vcharset_ordered_list
;
98 /* Incremented everytime we change Vcharset_ordered_list. This is
99 unsigned short so that it fits in Lisp_Int and never matches
101 unsigned short charset_ordered_list_tick
;
103 /* List of iso-2022 charsets. */
104 Lisp_Object Viso_2022_charset_list
;
106 /* List of emacs-mule charsets. */
107 Lisp_Object Vemacs_mule_charset_list
;
109 struct charset
*emacs_mule_charset
[256];
111 /* Mapping table from ISO2022's charset (specified by DIMENSION,
112 CHARS, and FINAL-CHAR) to Emacs' charset. */
113 int iso_charset_table
[ISO_MAX_DIMENSION
][ISO_MAX_CHARS
][ISO_MAX_FINAL
];
115 Lisp_Object Vcharset_map_path
;
117 Lisp_Object Vchar_unified_charset_table
;
119 /* Defined in chartab.c */
121 map_char_table_for_charset
P_ ((void (*c_function
) (Lisp_Object
, Lisp_Object
),
122 Lisp_Object function
, Lisp_Object table
,
123 Lisp_Object arg
, struct charset
*charset
,
124 unsigned from
, unsigned to
));
126 #define CODE_POINT_TO_INDEX(charset, code) \
127 ((charset)->code_linear_p \
128 ? (code) - (charset)->min_code \
129 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
130 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
131 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
132 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
133 ? (((((code) >> 24) - (charset)->code_space[12]) \
134 * (charset)->code_space[11]) \
135 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
136 * (charset)->code_space[7]) \
137 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
138 * (charset)->code_space[3]) \
139 + (((code) & 0xFF) - (charset)->code_space[0]) \
140 - ((charset)->char_index_offset)) \
144 /* Convert the character index IDX to code-point CODE for CHARSET.
145 It is assumed that IDX is in a valid range. */
147 #define INDEX_TO_CODE_POINT(charset, idx) \
148 ((charset)->code_linear_p \
149 ? (idx) + (charset)->min_code \
150 : (idx += (charset)->char_index_offset, \
151 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
152 | (((charset)->code_space[4] \
153 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
155 | (((charset)->code_space[8] \
156 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
158 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
164 /* Set to 1 to warn that a charset map is loaded and thus a buffer
165 text and a string data may be relocated. */
166 int charset_map_loaded
;
168 struct charset_map_entries
174 struct charset_map_entries
*next
;
177 /* Load the mapping information for CHARSET from ENTRIES.
179 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
181 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
182 CHARSET->decoder, and CHARSET->encoder.
184 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
185 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
189 load_charset_map (charset
, entries
, n_entries
, control_flag
)
190 struct charset
*charset
;
191 struct charset_map_entries
*entries
;
195 Lisp_Object vec
, table
;
196 unsigned max_code
= CHARSET_MAX_CODE (charset
);
197 int ascii_compatible_p
= charset
->ascii_compatible_p
;
198 int min_char
, max_char
, nonascii_min_char
;
200 unsigned char *fast_map
= charset
->fast_map
;
205 if (control_flag
> 0)
207 int n
= CODE_POINT_TO_INDEX (charset
, max_code
) + 1;
209 table
= Fmake_char_table (Qnil
, Qnil
);
210 if (control_flag
== 1)
211 vec
= Fmake_vector (make_number (n
), make_number (-1));
212 else if (! CHAR_TABLE_P (Vchar_unify_table
))
213 Vchar_unify_table
= Fmake_char_table (Qnil
, Qnil
);
215 charset_map_loaded
= 1;
218 min_char
= max_char
= entries
->entry
[0].c
;
219 nonascii_min_char
= MAX_CHAR
;
220 for (i
= 0; i
< n_entries
; i
++)
223 int from_index
, to_index
;
225 int idx
= i
% 0x10000;
227 if (i
> 0 && idx
== 0)
228 entries
= entries
->next
;
229 from
= entries
->entry
[idx
].from
;
230 to
= entries
->entry
[idx
].to
;
231 from_c
= entries
->entry
[idx
].c
;
232 from_index
= CODE_POINT_TO_INDEX (charset
, from
);
235 to_index
= from_index
;
240 to_index
= CODE_POINT_TO_INDEX (charset
, to
);
241 to_c
= from_c
+ (to_index
- from_index
);
243 if (from_index
< 0 || to_index
< 0)
246 if (control_flag
< 2)
252 else if (from_c
< min_char
)
254 if (ascii_compatible_p
)
256 if (! ASCII_BYTE_P (from_c
))
258 if (from_c
< nonascii_min_char
)
259 nonascii_min_char
= from_c
;
261 else if (! ASCII_BYTE_P (to_c
))
263 nonascii_min_char
= 0x80;
267 for (c
= from_c
; c
<= to_c
; c
++)
268 CHARSET_FAST_MAP_SET (c
, fast_map
);
270 if (control_flag
== 1)
272 unsigned code
= from
;
274 if (CHARSET_COMPACT_CODES_P (charset
))
277 ASET (vec
, from_index
, make_number (from_c
));
278 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
279 CHAR_TABLE_SET (table
, from_c
, make_number (code
));
280 if (from_index
== to_index
)
282 from_index
++, from_c
++;
283 code
= INDEX_TO_CODE_POINT (charset
, from_index
);
286 for (; from_index
<= to_index
; from_index
++, from_c
++)
288 ASET (vec
, from_index
, make_number (from_c
));
289 if (NILP (CHAR_TABLE_REF (table
, from_c
)))
290 CHAR_TABLE_SET (table
, from_c
, make_number (from_index
));
296 unsigned code
= from
;
300 int c1
= DECODE_CHAR (charset
, code
);
304 CHAR_TABLE_SET (table
, from_c
, make_number (c1
));
305 CHAR_TABLE_SET (Vchar_unify_table
, c1
, make_number (from_c
));
306 if (CHAR_TABLE_P (Vchar_unified_charset_table
))
307 CHAR_TABLE_SET (Vchar_unified_charset_table
, c1
,
308 CHARSET_NAME (charset
));
310 if (from_index
== to_index
)
312 from_index
++, from_c
++;
313 code
= INDEX_TO_CODE_POINT (charset
, from_index
);
318 if (control_flag
< 2)
320 CHARSET_MIN_CHAR (charset
) = (ascii_compatible_p
321 ? nonascii_min_char
: min_char
);
322 CHARSET_MAX_CHAR (charset
) = max_char
;
323 if (control_flag
== 1)
325 CHARSET_DECODER (charset
) = vec
;
326 CHARSET_ENCODER (charset
) = table
;
330 CHARSET_DEUNIFIER (charset
) = table
;
334 /* Read a hexadecimal number (preceded by "0x") from the file FP while
335 paying attention to comment charcter '#'. */
337 static INLINE
unsigned
345 while ((c
= getc (fp
)) != EOF
)
349 while ((c
= getc (fp
)) != EOF
&& c
!= '\n');
353 if ((c
= getc (fp
)) == EOF
|| c
== 'x')
365 while ((c
= getc (fp
)) != EOF
&& isxdigit (c
))
367 | (c
<= '9' ? c
- '0' : c
<= 'F' ? c
- 'A' + 10 : c
- 'a' + 10));
369 while ((c
= getc (fp
)) != EOF
&& isdigit (c
))
370 n
= (n
* 10) + c
- '0';
377 /* Return a mapping vector for CHARSET loaded from MAPFILE.
378 Each line of MAPFILE has this form
380 where 0xAAAA is a code-point and 0xCCCC is the corresponding
381 character code, or this form
383 where 0xAAAA and 0xBBBB are code-points specifying a range, and
384 0xCCCC is the first character code of the range.
386 The returned vector has this form:
387 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
388 where CODE1 is a code-point or a cons of code-points specifying a
391 extern void add_to_log
P_ ((char *, Lisp_Object
, Lisp_Object
));
394 load_charset_map_from_file (charset
, mapfile
, control_flag
)
395 struct charset
*charset
;
399 unsigned min_code
= CHARSET_MIN_CODE (charset
);
400 unsigned max_code
= CHARSET_MAX_CODE (charset
);
404 Lisp_Object suffixes
;
405 struct charset_map_entries
*head
, *entries
;
408 suffixes
= Fcons (build_string (".map"),
409 Fcons (build_string (".TXT"), Qnil
));
411 fd
= openp (Vcharset_map_path
, mapfile
, suffixes
, NULL
, Qnil
);
413 || ! (fp
= fdopen (fd
, "r")))
415 add_to_log ("Failure in loading charset map: %S", mapfile
, Qnil
);
419 head
= entries
= ((struct charset_map_entries
*)
420 alloca (sizeof (struct charset_map_entries
)));
429 from
= read_hex (fp
, &eof
);
432 if (getc (fp
) == '-')
433 to
= read_hex (fp
, &eof
);
436 c
= (int) read_hex (fp
, &eof
);
438 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
441 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
443 entries
->next
= ((struct charset_map_entries
*)
444 alloca (sizeof (struct charset_map_entries
)));
445 entries
= entries
->next
;
447 idx
= n_entries
% 0x10000;
448 entries
->entry
[idx
].from
= from
;
449 entries
->entry
[idx
].to
= to
;
450 entries
->entry
[idx
].c
= c
;
456 load_charset_map (charset
, head
, n_entries
, control_flag
);
460 load_charset_map_from_vector (charset
, vec
, control_flag
)
461 struct charset
*charset
;
465 unsigned min_code
= CHARSET_MIN_CODE (charset
);
466 unsigned max_code
= CHARSET_MAX_CODE (charset
);
467 struct charset_map_entries
*head
, *entries
;
469 int len
= ASIZE (vec
);
474 add_to_log ("Failure in loading charset map: %V", vec
, Qnil
);
478 head
= entries
= ((struct charset_map_entries
*)
479 alloca (sizeof (struct charset_map_entries
)));
481 for (i
= 0; i
< len
; i
+= 2)
483 Lisp_Object val
, val2
;
495 from
= XFASTINT (val
);
496 to
= XFASTINT (val2
);
501 from
= to
= XFASTINT (val
);
503 val
= AREF (vec
, i
+ 1);
507 if (from
< min_code
|| to
> max_code
|| from
> to
|| c
> MAX_CHAR
)
510 if (n_entries
> 0 && (n_entries
% 0x10000) == 0)
512 entries
->next
= ((struct charset_map_entries
*)
513 alloca (sizeof (struct charset_map_entries
)));
514 entries
= entries
->next
;
516 idx
= n_entries
% 0x10000;
517 entries
->entry
[idx
].from
= from
;
518 entries
->entry
[idx
].to
= to
;
519 entries
->entry
[idx
].c
= c
;
523 load_charset_map (charset
, head
, n_entries
, control_flag
);
527 load_charset (charset
)
528 struct charset
*charset
;
530 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP_DEFERRED
)
534 map
= CHARSET_MAP (charset
);
536 load_charset_map_from_file (charset
, map
, 1);
538 load_charset_map_from_vector (charset
, map
, 1);
539 CHARSET_METHOD (charset
) = CHARSET_METHOD_MAP
;
544 DEFUN ("charsetp", Fcharsetp
, Scharsetp
, 1, 1, 0,
545 doc
: /* Return non-nil if and only if OBJECT is a charset.*/)
549 return (CHARSETP (object
) ? Qt
: Qnil
);
554 map_charset_chars (c_function
, function
, arg
,
556 void (*c_function
) P_ ((Lisp_Object
, Lisp_Object
));
557 Lisp_Object function
, arg
;
558 struct charset
*charset
;
564 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP_DEFERRED
)
565 load_charset (charset
);
567 partial
= (from
> CHARSET_MIN_CODE (charset
)
568 || to
< CHARSET_MAX_CODE (charset
));
570 if (CHARSET_UNIFIED_P (charset
)
571 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset
)))
573 map_char_table_for_charset (c_function
, function
,
574 CHARSET_DEUNIFIER (charset
), arg
,
575 partial
? charset
: NULL
, from
, to
);
578 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_OFFSET
)
580 int from_idx
= CODE_POINT_TO_INDEX (charset
, from
);
581 int to_idx
= CODE_POINT_TO_INDEX (charset
, to
);
582 int from_c
= from_idx
+ CHARSET_CODE_OFFSET (charset
);
583 int to_c
= to_idx
+ CHARSET_CODE_OFFSET (charset
);
585 range
= Fcons (make_number (from_c
), make_number (to_c
));
587 (*c_function
) (arg
, range
);
589 call2 (function
, range
, arg
);
591 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
593 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
595 map_char_table_for_charset (c_function
, function
,
596 CHARSET_ENCODER (charset
), arg
,
597 partial
? charset
: NULL
, from
, to
);
599 else if (CHARSET_METHOD (charset
) == CHARSET_METHOD_SUBSET
)
601 Lisp_Object subset_info
;
604 subset_info
= CHARSET_SUBSET (charset
);
605 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
606 offset
= XINT (AREF (subset_info
, 3));
608 if (from
< XFASTINT (AREF (subset_info
, 1)))
609 from
= XFASTINT (AREF (subset_info
, 1));
611 if (to
> XFASTINT (AREF (subset_info
, 2)))
612 to
= XFASTINT (AREF (subset_info
, 2));
613 map_charset_chars (c_function
, function
, arg
, charset
, from
, to
);
615 else /* i.e. CHARSET_METHOD_SUPERSET */
619 for (parents
= CHARSET_SUPERSET (charset
); CONSP (parents
);
620 parents
= XCDR (parents
))
623 unsigned this_from
, this_to
;
625 charset
= CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents
))));
626 offset
= XINT (XCDR (XCAR (parents
)));
627 this_from
= from
- offset
;
628 this_to
= to
- offset
;
629 if (this_from
< CHARSET_MIN_CODE (charset
))
630 this_from
= CHARSET_MIN_CODE (charset
);
631 if (this_to
> CHARSET_MAX_CODE (charset
))
632 this_to
= CHARSET_MAX_CODE (charset
);
633 map_charset_chars (c_function
, function
, arg
, charset
,
639 DEFUN ("map-charset-chars", Fmap_charset_chars
, Smap_charset_chars
, 2, 5, 0,
640 doc
: /* Call FUNCTION for all characters in CHARSET.
641 FUNCTION is called with an argument RANGE and the optional 3rd
644 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
645 characters contained in CHARSET.
647 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
648 range of code points of target characters. */)
649 (function
, charset
, arg
, from_code
, to_code
)
650 Lisp_Object function
, charset
, arg
, from_code
, to_code
;
655 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
656 if (NILP (from_code
))
657 from
= CHARSET_MIN_CODE (cs
);
660 CHECK_NATNUM (from_code
);
661 from
= XINT (from_code
);
662 if (from
< CHARSET_MIN_CODE (cs
))
663 from
= CHARSET_MIN_CODE (cs
);
666 to
= CHARSET_MAX_CODE (cs
);
669 CHECK_NATNUM (to_code
);
671 if (to
> CHARSET_MAX_CODE (cs
))
672 to
= CHARSET_MAX_CODE (cs
);
674 map_charset_chars (NULL
, function
, arg
, cs
, from
, to
);
679 /* Define a charset according to the arguments. The Nth argument is
680 the Nth attribute of the charset (the last attribute `charset-id'
681 is not included). See the docstring of `define-charset' for the
684 DEFUN ("define-charset-internal", Fdefine_charset_internal
,
685 Sdefine_charset_internal
, charset_arg_max
, MANY
, 0,
686 doc
: /* For internal use only.
687 usage: (define-charset-internal ...) */)
692 /* Charset attr vector. */
696 struct Lisp_Hash_Table
*hash_table
= XHASH_TABLE (Vcharset_hash_table
);
698 struct charset charset
;
701 int new_definition_p
;
704 if (nargs
!= charset_arg_max
)
705 return Fsignal (Qwrong_number_of_arguments
,
706 Fcons (intern ("define-charset-internal"),
707 make_number (nargs
)));
709 attrs
= Fmake_vector (make_number (charset_attr_max
), Qnil
);
711 CHECK_SYMBOL (args
[charset_arg_name
]);
712 ASET (attrs
, charset_name
, args
[charset_arg_name
]);
714 val
= args
[charset_arg_code_space
];
715 for (i
= 0, dimension
= 0, nchars
= 1; i
< 4; i
++)
717 int min_byte
, max_byte
;
719 min_byte
= XINT (Faref (val
, make_number (i
* 2)));
720 max_byte
= XINT (Faref (val
, make_number (i
* 2 + 1)));
721 if (min_byte
< 0 || min_byte
> max_byte
|| max_byte
>= 256)
722 error ("Invalid :code-space value");
723 charset
.code_space
[i
* 4] = min_byte
;
724 charset
.code_space
[i
* 4 + 1] = max_byte
;
725 charset
.code_space
[i
* 4 + 2] = max_byte
- min_byte
+ 1;
726 nchars
*= charset
.code_space
[i
* 4 + 2];
727 charset
.code_space
[i
* 4 + 3] = nchars
;
732 val
= args
[charset_arg_dimension
];
734 charset
.dimension
= dimension
;
738 charset
.dimension
= XINT (val
);
739 if (charset
.dimension
< 1 || charset
.dimension
> 4)
740 args_out_of_range_3 (val
, make_number (1), make_number (4));
743 charset
.code_linear_p
744 = (charset
.dimension
== 1
745 || (charset
.code_space
[2] == 256
746 && (charset
.dimension
== 2
747 || (charset
.code_space
[6] == 256
748 && (charset
.dimension
== 3
749 || charset
.code_space
[10] == 256)))));
751 if (! charset
.code_linear_p
)
753 charset
.code_space_mask
= (unsigned char *) xmalloc (256);
754 bzero (charset
.code_space_mask
, 256);
755 for (i
= 0; i
< 4; i
++)
756 for (j
= charset
.code_space
[i
* 4]; j
<= charset
.code_space
[i
* 4 + 1];
758 charset
.code_space_mask
[j
] |= (1 << i
);
761 charset
.iso_chars_96
= charset
.code_space
[2] == 96;
763 charset
.min_code
= (charset
.code_space
[0]
764 | (charset
.code_space
[4] << 8)
765 | (charset
.code_space
[8] << 16)
766 | (charset
.code_space
[12] << 24));
767 charset
.max_code
= (charset
.code_space
[1]
768 | (charset
.code_space
[5] << 8)
769 | (charset
.code_space
[9] << 16)
770 | (charset
.code_space
[13] << 24));
771 charset
.char_index_offset
= 0;
773 val
= args
[charset_arg_min_code
];
783 CHECK_NUMBER_CAR (val
);
784 CHECK_NUMBER_CDR (val
);
785 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
787 if (code
< charset
.min_code
788 || code
> charset
.max_code
)
789 args_out_of_range_3 (make_number (charset
.min_code
),
790 make_number (charset
.max_code
), val
);
791 charset
.char_index_offset
= CODE_POINT_TO_INDEX (&charset
, code
);
792 charset
.min_code
= code
;
795 val
= args
[charset_arg_max_code
];
805 CHECK_NUMBER_CAR (val
);
806 CHECK_NUMBER_CDR (val
);
807 code
= (XINT (XCAR (val
)) << 16) | (XINT (XCDR (val
)));
809 if (code
< charset
.min_code
810 || code
> charset
.max_code
)
811 args_out_of_range_3 (make_number (charset
.min_code
),
812 make_number (charset
.max_code
), val
);
813 charset
.max_code
= code
;
816 charset
.compact_codes_p
= charset
.max_code
< 0x1000000;
818 val
= args
[charset_arg_invalid_code
];
821 if (charset
.min_code
> 0)
822 charset
.invalid_code
= 0;
825 XSETINT (val
, charset
.max_code
+ 1);
826 if (XINT (val
) == charset
.max_code
+ 1)
827 charset
.invalid_code
= charset
.max_code
+ 1;
829 error ("Attribute :invalid-code must be specified");
835 charset
.invalid_code
= XFASTINT (val
);
838 val
= args
[charset_arg_iso_final
];
840 charset
.iso_final
= -1;
844 if (XINT (val
) < '0' || XINT (val
) > 127)
845 error ("Invalid iso-final-char: %d", XINT (val
));
846 charset
.iso_final
= XINT (val
);
849 val
= args
[charset_arg_iso_revision
];
851 charset
.iso_revision
= -1;
856 args_out_of_range (make_number (63), val
);
857 charset
.iso_revision
= XINT (val
);
860 val
= args
[charset_arg_emacs_mule_id
];
862 charset
.emacs_mule_id
= -1;
866 if ((XINT (val
) > 0 && XINT (val
) <= 128) || XINT (val
) >= 256)
867 error ("Invalid emacs-mule-id: %d", XINT (val
));
868 charset
.emacs_mule_id
= XINT (val
);
871 charset
.ascii_compatible_p
= ! NILP (args
[charset_arg_ascii_compatible_p
]);
873 charset
.supplementary_p
= ! NILP (args
[charset_arg_supplementary_p
]);
875 charset
.unified_p
= 0;
877 bzero (charset
.fast_map
, sizeof (charset
.fast_map
));
879 if (! NILP (args
[charset_arg_code_offset
]))
881 val
= args
[charset_arg_code_offset
];
884 charset
.method
= CHARSET_METHOD_OFFSET
;
885 charset
.code_offset
= XINT (val
);
887 i
= CODE_POINT_TO_INDEX (&charset
, charset
.min_code
);
888 charset
.min_char
= i
+ charset
.code_offset
;
889 i
= CODE_POINT_TO_INDEX (&charset
, charset
.max_code
);
890 charset
.max_char
= i
+ charset
.code_offset
;
891 if (charset
.max_char
> MAX_CHAR
)
892 error ("Unsupported max char: %d", charset
.max_char
);
894 i
= (charset
.min_char
>> 7) << 7;
895 for (; i
< 0x10000 && i
<= charset
.max_char
; i
+= 128)
896 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
898 for (; i
<= charset
.max_char
; i
+= 0x1000)
899 CHARSET_FAST_MAP_SET (i
, charset
.fast_map
);
901 else if (! NILP (args
[charset_arg_map
]))
903 val
= args
[charset_arg_map
];
904 ASET (attrs
, charset_map
, val
);
906 load_charset_map_from_file (&charset
, val
, 0);
908 load_charset_map_from_vector (&charset
, val
, 0);
909 charset
.method
= CHARSET_METHOD_MAP_DEFERRED
;
911 else if (! NILP (args
[charset_arg_subset
]))
914 Lisp_Object parent_min_code
, parent_max_code
, parent_code_offset
;
915 struct charset
*parent_charset
;
917 val
= args
[charset_arg_subset
];
919 CHECK_CHARSET_GET_CHARSET (parent
, parent_charset
);
920 parent_min_code
= Fnth (make_number (1), val
);
921 CHECK_NATNUM (parent_min_code
);
922 parent_max_code
= Fnth (make_number (2), val
);
923 CHECK_NATNUM (parent_max_code
);
924 parent_code_offset
= Fnth (make_number (3), val
);
925 CHECK_NUMBER (parent_code_offset
);
926 val
= Fmake_vector (make_number (4), Qnil
);
927 ASET (val
, 0, make_number (parent_charset
->id
));
928 ASET (val
, 1, parent_min_code
);
929 ASET (val
, 2, parent_max_code
);
930 ASET (val
, 3, parent_code_offset
);
931 ASET (attrs
, charset_subset
, val
);
933 charset
.method
= CHARSET_METHOD_SUBSET
;
934 /* Here, we just copy the parent's fast_map. It's not accurate,
935 but at least it works for quickly detecting which character
936 DOESN'T belong to this charset. */
937 for (i
= 0; i
< 190; i
++)
938 charset
.fast_map
[i
] = parent_charset
->fast_map
[i
];
940 /* We also copy these for parents. */
941 charset
.min_char
= parent_charset
->min_char
;
942 charset
.max_char
= parent_charset
->max_char
;
944 else if (! NILP (args
[charset_arg_superset
]))
946 val
= args
[charset_arg_superset
];
947 charset
.method
= CHARSET_METHOD_SUPERSET
;
948 val
= Fcopy_sequence (val
);
949 ASET (attrs
, charset_superset
, val
);
951 charset
.min_char
= MAX_CHAR
;
952 charset
.max_char
= 0;
953 for (; ! NILP (val
); val
= Fcdr (val
))
955 Lisp_Object elt
, car_part
, cdr_part
;
957 struct charset
*this_charset
;
962 car_part
= XCAR (elt
);
963 cdr_part
= XCDR (elt
);
964 CHECK_CHARSET_GET_ID (car_part
, this_id
);
965 CHECK_NUMBER (cdr_part
);
966 offset
= XINT (cdr_part
);
970 CHECK_CHARSET_GET_ID (elt
, this_id
);
973 XSETCAR (val
, Fcons (make_number (this_id
), make_number (offset
)));
975 this_charset
= CHARSET_FROM_ID (this_id
);
976 if (charset
.min_char
> this_charset
->min_char
)
977 charset
.min_char
= this_charset
->min_char
;
978 if (charset
.max_char
< this_charset
->max_char
)
979 charset
.max_char
= this_charset
->max_char
;
980 for (i
= 0; i
< 190; i
++)
981 charset
.fast_map
[i
] |= this_charset
->fast_map
[i
];
985 error ("None of :code-offset, :map, :parents are specified");
987 val
= args
[charset_arg_unify_map
];
988 if (! NILP (val
) && !STRINGP (val
))
990 ASET (attrs
, charset_unify_map
, val
);
992 CHECK_LIST (args
[charset_arg_plist
]);
993 ASET (attrs
, charset_plist
, args
[charset_arg_plist
]);
995 charset
.hash_index
= hash_lookup (hash_table
, args
[charset_arg_name
],
997 if (charset
.hash_index
>= 0)
999 new_definition_p
= 0;
1000 id
= XFASTINT (CHARSET_SYMBOL_ID (args
[charset_arg_name
]));
1001 HASH_VALUE (hash_table
, charset
.hash_index
) = attrs
;
1005 charset
.hash_index
= hash_put (hash_table
, args
[charset_arg_name
], attrs
,
1007 if (charset_table_used
== charset_table_size
)
1009 struct charset
*new_table
1010 = (struct charset
*) xmalloc (sizeof (struct charset
)
1011 * (charset_table_size
+ 16));
1012 bcopy (charset_table
, new_table
,
1013 sizeof (struct charset
) * charset_table_size
);
1014 charset_table_size
+= 16;
1015 charset_table
= new_table
;
1017 id
= charset_table_used
++;
1018 new_definition_p
= 1;
1021 ASET (attrs
, charset_id
, make_number (id
));
1023 charset_table
[id
] = charset
;
1025 if (charset
.iso_final
>= 0)
1027 ISO_CHARSET_TABLE (charset
.dimension
, charset
.iso_chars_96
,
1028 charset
.iso_final
) = id
;
1029 if (new_definition_p
)
1030 Viso_2022_charset_list
= nconc2 (Viso_2022_charset_list
,
1031 Fcons (make_number (id
), Qnil
));
1032 if (ISO_CHARSET_TABLE (1, 0, 'J') == id
)
1033 charset_jisx0201_roman
= id
;
1034 else if (ISO_CHARSET_TABLE (2, 0, '@') == id
)
1035 charset_jisx0208_1978
= id
;
1036 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id
)
1037 charset_jisx0208
= id
;
1040 if (charset
.emacs_mule_id
>= 0)
1042 emacs_mule_charset
[charset
.emacs_mule_id
] = CHARSET_FROM_ID (id
);
1043 if (charset
.emacs_mule_id
< 0xA0)
1044 emacs_mule_bytes
[charset
.emacs_mule_id
] = charset
.dimension
+ 1;
1045 if (new_definition_p
)
1046 Vemacs_mule_charset_list
= nconc2 (Vemacs_mule_charset_list
,
1047 Fcons (make_number (id
), Qnil
));
1050 if (new_definition_p
)
1052 Vcharset_list
= Fcons (args
[charset_arg_name
], Vcharset_list
);
1053 Vcharset_ordered_list
= nconc2 (Vcharset_ordered_list
,
1054 Fcons (make_number (id
), Qnil
));
1055 charset_ordered_list_tick
++;
1062 /* Same as Fdefine_charset_internal but arguments are more convenient
1063 to call from C (typically in syms_of_charset). This can define a
1064 charset of `offset' method only. Return the ID of the new
1068 define_charset_internal (name
, dimension
, code_space
, min_code
, max_code
,
1069 iso_final
, iso_revision
, emacs_mule_id
,
1070 ascii_compatible
, supplementary
,
1074 unsigned char *code_space
;
1075 unsigned min_code
, max_code
;
1076 int iso_final
, iso_revision
, emacs_mule_id
;
1077 int ascii_compatible
, supplementary
;
1080 Lisp_Object args
[charset_arg_max
];
1081 Lisp_Object plist
[14];
1085 args
[charset_arg_name
] = name
;
1086 args
[charset_arg_dimension
] = make_number (dimension
);
1087 val
= Fmake_vector (make_number (8), make_number (0));
1088 for (i
= 0; i
< 8; i
++)
1089 ASET (val
, i
, make_number (code_space
[i
]));
1090 args
[charset_arg_code_space
] = val
;
1091 args
[charset_arg_min_code
] = make_number (min_code
);
1092 args
[charset_arg_max_code
] = make_number (max_code
);
1093 args
[charset_arg_iso_final
]
1094 = (iso_final
< 0 ? Qnil
: make_number (iso_final
));
1095 args
[charset_arg_iso_revision
] = make_number (iso_revision
);
1096 args
[charset_arg_emacs_mule_id
]
1097 = (emacs_mule_id
< 0 ? Qnil
: make_number (emacs_mule_id
));
1098 args
[charset_arg_ascii_compatible_p
] = ascii_compatible
? Qt
: Qnil
;
1099 args
[charset_arg_supplementary_p
] = supplementary
? Qt
: Qnil
;
1100 args
[charset_arg_invalid_code
] = Qnil
;
1101 args
[charset_arg_code_offset
] = make_number (code_offset
);
1102 args
[charset_arg_map
] = Qnil
;
1103 args
[charset_arg_subset
] = Qnil
;
1104 args
[charset_arg_superset
] = Qnil
;
1105 args
[charset_arg_unify_map
] = Qnil
;
1107 plist
[0] = intern (":name");
1108 plist
[1] = args
[charset_arg_name
];
1109 plist
[2] = intern (":dimension");
1110 plist
[3] = args
[charset_arg_dimension
];
1111 plist
[4] = intern (":code-space");
1112 plist
[5] = args
[charset_arg_code_space
];
1113 plist
[6] = intern (":iso-final-char");
1114 plist
[7] = args
[charset_arg_iso_final
];
1115 plist
[8] = intern (":emacs-mule-id");
1116 plist
[9] = args
[charset_arg_emacs_mule_id
];
1117 plist
[10] = intern (":ascii-compatible-p");
1118 plist
[11] = args
[charset_arg_ascii_compatible_p
];
1119 plist
[12] = intern (":code-offset");
1120 plist
[13] = args
[charset_arg_code_offset
];
1122 args
[charset_arg_plist
] = Flist (14, plist
);
1123 Fdefine_charset_internal (charset_arg_max
, args
);
1125 return XINT (CHARSET_SYMBOL_ID (name
));
1129 DEFUN ("define-charset-alias", Fdefine_charset_alias
,
1130 Sdefine_charset_alias
, 2, 2, 0,
1131 doc
: /* Define ALIAS as an alias for charset CHARSET. */)
1133 Lisp_Object alias
, charset
;
1137 CHECK_CHARSET_GET_ATTR (charset
, attr
);
1138 Fputhash (alias
, attr
, Vcharset_hash_table
);
1139 Vcharset_list
= Fcons (alias
, Vcharset_list
);
1144 DEFUN ("unibyte-charset", Funibyte_charset
, Sunibyte_charset
, 0, 0, 0,
1145 doc
: /* Return the unibyte charset (set by `set-unibyte-charset'). */)
1148 return CHARSET_NAME (CHARSET_FROM_ID (charset_unibyte
));
1152 DEFUN ("set-unibyte-charset", Fset_unibyte_charset
, Sset_unibyte_charset
,
1154 doc
: /* Set the unibyte charset to CHARSET.
1155 This determines how unibyte/multibyte conversion is done. See also
1156 function `unibyte-charset'. */)
1158 Lisp_Object charset
;
1163 CHECK_CHARSET_GET_CHARSET (charset
, cs
);
1164 if (! cs
->ascii_compatible_p
1165 || cs
->dimension
!= 1)
1166 error ("Inappropriate unibyte charset: %s", SDATA (SYMBOL_NAME (charset
)));
1167 charset_unibyte
= cs
->id
;
1168 memset (unibyte_has_multibyte_table
, 1, 128);
1169 for (i
= 128; i
< 256; i
++)
1171 c
= DECODE_CHAR (cs
, i
);
1172 unibyte_to_multibyte_table
[i
] = (c
< 0 ? BYTE8_TO_CHAR (i
) : c
);
1173 unibyte_has_multibyte_table
[i
] = c
>= 0;
1180 DEFUN ("charset-plist", Fcharset_plist
, Scharset_plist
, 1, 1, 0,
1181 doc
: /* Return the property list of CHARSET. */)
1183 Lisp_Object charset
;
1187 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1188 return CHARSET_ATTR_PLIST (attrs
);
1192 DEFUN ("set-charset-plist", Fset_charset_plist
, Sset_charset_plist
, 2, 2, 0,
1193 doc
: /* Set CHARSET's property list to PLIST. */)
1195 Lisp_Object charset
, plist
;
1199 CHECK_CHARSET_GET_ATTR (charset
, attrs
);
1200 CHARSET_ATTR_PLIST (attrs
) = plist
;
1205 DEFUN ("unify-charset", Funify_charset
, Sunify_charset
, 1, 3, 0,
1206 doc
: /* Unify characters of CHARSET with Unicode.
1207 This means reading the relevant file and installing the table defined
1208 by CHARSET's `:unify-map' property.
1210 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1211 the same meaning as the `:unify-map' attribute in the function
1212 `define-charset' (which see).
1214 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1215 (charset
, unify_map
, deunify
)
1216 Lisp_Object charset
, unify_map
, deunify
;
1221 CHECK_CHARSET_GET_ID (charset
, id
);
1222 cs
= CHARSET_FROM_ID (id
);
1223 if (CHARSET_METHOD (cs
) == CHARSET_METHOD_MAP_DEFERRED
)
1226 ? CHARSET_UNIFIED_P (cs
) && ! NILP (CHARSET_DEUNIFIER (cs
))
1227 : ! CHARSET_UNIFIED_P (cs
))
1230 CHARSET_UNIFIED_P (cs
) = 0;
1233 if (CHARSET_METHOD (cs
) != CHARSET_METHOD_OFFSET
)
1234 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset
)));
1235 if (NILP (unify_map
))
1236 unify_map
= CHARSET_UNIFY_MAP (cs
);
1237 if (STRINGP (unify_map
))
1238 load_charset_map_from_file (cs
, unify_map
, 2);
1239 else if (VECTORP (unify_map
))
1240 load_charset_map_from_vector (cs
, unify_map
, 2);
1241 else if (NILP (unify_map
))
1242 error ("No unify-map for charset");
1244 error ("Bad unify-map arg");
1245 CHARSET_UNIFIED_P (cs
) = 1;
1247 else if (CHAR_TABLE_P (Vchar_unify_table
))
1249 int min_code
= CHARSET_MIN_CODE (cs
);
1250 int max_code
= CHARSET_MAX_CODE (cs
);
1251 int min_char
= DECODE_CHAR (cs
, min_code
);
1252 int max_char
= DECODE_CHAR (cs
, max_code
);
1254 char_table_set_range (Vchar_unify_table
, min_char
, max_char
, Qnil
);
1260 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
1261 Sget_unused_iso_final_char
, 2, 2, 0,
1263 Return an unused ISO final char for a charset of DIMENISION and CHARS.
1264 DIMENSION is the number of bytes to represent a character: 1 or 2.
1265 CHARS is the number of characters in a dimension: 94 or 96.
1267 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1268 If there's no unused final char for the specified kind of charset,
1271 Lisp_Object dimension
, chars
;
1275 CHECK_NUMBER (dimension
);
1276 CHECK_NUMBER (chars
);
1277 if (XINT (dimension
) != 1 && XINT (dimension
) != 2 && XINT (dimension
) != 3)
1278 args_out_of_range_3 (dimension
, make_number (1), make_number (3));
1279 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1280 args_out_of_range_3 (chars
, make_number (94), make_number (96));
1281 for (final_char
= '0'; final_char
<= '?'; final_char
++)
1282 if (ISO_CHARSET_TABLE (XINT (dimension
), XINT (chars
), final_char
) < 0)
1284 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
1288 check_iso_charset_parameter (dimension
, chars
, final_char
)
1289 Lisp_Object dimension
, chars
, final_char
;
1291 CHECK_NATNUM (dimension
);
1292 CHECK_NATNUM (chars
);
1293 CHECK_NATNUM (final_char
);
1295 if (XINT (dimension
) > 3)
1296 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension
));
1297 if (XINT (chars
) != 94 && XINT (chars
) != 96)
1298 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
1299 if (XINT (final_char
) < '0' || XINT (final_char
) > '~')
1300 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
1304 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
1306 doc
: /* Declare an equivalent charset for ISO-2022 decoding.
1308 On decoding by an ISO-2022 base coding system, when a charset
1309 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1310 if CHARSET is designated instead. */)
1311 (dimension
, chars
, final_char
, charset
)
1312 Lisp_Object dimension
, chars
, final_char
, charset
;
1317 CHECK_CHARSET_GET_ID (charset
, id
);
1318 check_iso_charset_parameter (dimension
, chars
, final_char
);
1319 chars_flag
= XINT (chars
) == 96;
1320 ISO_CHARSET_TABLE (XINT (dimension
), chars_flag
, XINT (final_char
)) = id
;
1325 /* Return information about charsets in the text at PTR of NBYTES
1326 bytes, which are NCHARS characters. The value is:
1328 0: Each character is represented by one byte. This is always
1329 true for a unibyte string. For a multibyte string, true if
1330 it contains only ASCII characters.
1332 1: No charsets other than ascii, control-1, and latin-1 are
1339 string_xstring_p (string
)
1342 const unsigned char *p
= SDATA (string
);
1343 const unsigned char *endp
= p
+ SBYTES (string
);
1345 if (SCHARS (string
) == SBYTES (string
))
1350 int c
= STRING_CHAR_ADVANCE (p
);
1359 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1361 CHARSETS is a vector. If Nth element is non-nil, it means the
1362 charset whose id is N is already found.
1364 It may lookup a translation table TABLE if supplied. */
1367 find_charsets_in_text (ptr
, nchars
, nbytes
, charsets
, table
, multibyte
)
1368 const unsigned char *ptr
;
1369 EMACS_INT nchars
, nbytes
;
1370 Lisp_Object charsets
, table
;
1373 const unsigned char *pend
= ptr
+ nbytes
;
1375 if (nchars
== nbytes
)
1378 ASET (charsets
, charset_ascii
, Qt
);
1385 c
= translate_char (table
, c
);
1386 if (ASCII_BYTE_P (c
))
1387 ASET (charsets
, charset_ascii
, Qt
);
1389 ASET (charsets
, charset_eight_bit
, Qt
);
1396 int c
= STRING_CHAR_ADVANCE (ptr
);
1397 struct charset
*charset
;
1400 c
= translate_char (table
, c
);
1401 charset
= CHAR_CHARSET (c
);
1402 ASET (charsets
, CHARSET_ID (charset
), Qt
);
1407 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
1409 doc
: /* Return a list of charsets in the region between BEG and END.
1410 BEG and END are buffer positions.
1411 Optional arg TABLE if non-nil is a translation table to look up.
1413 If the current buffer is unibyte, the returned list may contain
1414 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1416 Lisp_Object beg
, end
, table
;
1418 Lisp_Object charsets
;
1419 EMACS_INT from
, from_byte
, to
, stop
, stop_byte
;
1422 int multibyte
= ! NILP (current_buffer
->enable_multibyte_characters
);
1424 validate_region (&beg
, &end
);
1425 from
= XFASTINT (beg
);
1426 stop
= to
= XFASTINT (end
);
1428 if (from
< GPT
&& GPT
< to
)
1431 stop_byte
= GPT_BYTE
;
1434 stop_byte
= CHAR_TO_BYTE (stop
);
1436 from_byte
= CHAR_TO_BYTE (from
);
1438 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1441 find_charsets_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
1442 stop_byte
- from_byte
, charsets
, table
,
1446 from
= stop
, from_byte
= stop_byte
;
1447 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
1454 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1455 if (!NILP (AREF (charsets
, i
)))
1456 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1460 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1462 doc
: /* Return a list of charsets in STR.
1463 Optional arg TABLE if non-nil is a translation table to look up.
1465 If STR is unibyte, the returned list may contain
1466 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1468 Lisp_Object str
, table
;
1470 Lisp_Object charsets
;
1476 charsets
= Fmake_vector (make_number (charset_table_used
), Qnil
);
1477 find_charsets_in_text (SDATA (str
), SCHARS (str
), SBYTES (str
),
1479 STRING_MULTIBYTE (str
));
1481 for (i
= charset_table_used
- 1; i
>= 0; i
--)
1482 if (!NILP (AREF (charsets
, i
)))
1483 val
= Fcons (CHARSET_NAME (charset_table
+ i
), val
);
1489 /* Return a character correponding to the code-point CODE of
1493 decode_char (charset
, code
)
1494 struct charset
*charset
;
1498 enum charset_method method
= CHARSET_METHOD (charset
);
1500 if (code
< CHARSET_MIN_CODE (charset
) || code
> CHARSET_MAX_CODE (charset
))
1503 if (method
== CHARSET_METHOD_MAP_DEFERRED
)
1505 load_charset (charset
);
1506 method
= CHARSET_METHOD (charset
);
1509 if (method
== CHARSET_METHOD_SUBSET
)
1511 Lisp_Object subset_info
;
1513 subset_info
= CHARSET_SUBSET (charset
);
1514 charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1515 code
-= XINT (AREF (subset_info
, 3));
1516 if (code
< XFASTINT (AREF (subset_info
, 1))
1517 || code
> XFASTINT (AREF (subset_info
, 2)))
1520 c
= DECODE_CHAR (charset
, code
);
1522 else if (method
== CHARSET_METHOD_SUPERSET
)
1524 Lisp_Object parents
;
1526 parents
= CHARSET_SUPERSET (charset
);
1528 for (; CONSP (parents
); parents
= XCDR (parents
))
1530 int id
= XINT (XCAR (XCAR (parents
)));
1531 int code_offset
= XINT (XCDR (XCAR (parents
)));
1532 unsigned this_code
= code
- code_offset
;
1534 charset
= CHARSET_FROM_ID (id
);
1535 if ((c
= DECODE_CHAR (charset
, this_code
)) >= 0)
1541 char_index
= CODE_POINT_TO_INDEX (charset
, code
);
1545 if (method
== CHARSET_METHOD_MAP
)
1547 Lisp_Object decoder
;
1549 decoder
= CHARSET_DECODER (charset
);
1550 if (! VECTORP (decoder
))
1552 c
= XINT (AREF (decoder
, char_index
));
1556 c
= char_index
+ CHARSET_CODE_OFFSET (charset
);
1560 if (CHARSET_UNIFIED_P (charset
)
1563 MAYBE_UNIFY_CHAR (c
);
1569 /* Variable used temporarily by the macro ENCODE_CHAR. */
1570 Lisp_Object charset_work
;
1572 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1573 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1574 use CHARSET's strict_max_char instead of max_char. */
1577 encode_char (charset
, c
)
1578 struct charset
*charset
;
1582 enum charset_method method
= CHARSET_METHOD (charset
);
1584 if (CHARSET_UNIFIED_P (charset
))
1586 Lisp_Object deunifier
, deunified
;
1588 deunifier
= CHARSET_DEUNIFIER (charset
);
1589 if (! CHAR_TABLE_P (deunifier
))
1591 Funify_charset (CHARSET_NAME (charset
), Qnil
, Qnil
);
1592 deunifier
= CHARSET_DEUNIFIER (charset
);
1594 deunified
= CHAR_TABLE_REF (deunifier
, c
);
1595 if (! NILP (deunified
))
1596 c
= XINT (deunified
);
1599 if (method
== CHARSET_METHOD_SUBSET
)
1601 Lisp_Object subset_info
;
1602 struct charset
*this_charset
;
1604 subset_info
= CHARSET_SUBSET (charset
);
1605 this_charset
= CHARSET_FROM_ID (XFASTINT (AREF (subset_info
, 0)));
1606 code
= ENCODE_CHAR (this_charset
, c
);
1607 if (code
== CHARSET_INVALID_CODE (this_charset
)
1608 || code
< XFASTINT (AREF (subset_info
, 1))
1609 || code
> XFASTINT (AREF (subset_info
, 2)))
1610 return CHARSET_INVALID_CODE (charset
);
1611 code
+= XINT (AREF (subset_info
, 3));
1615 if (method
== CHARSET_METHOD_SUPERSET
)
1617 Lisp_Object parents
;
1619 parents
= CHARSET_SUPERSET (charset
);
1620 for (; CONSP (parents
); parents
= XCDR (parents
))
1622 int id
= XINT (XCAR (XCAR (parents
)));
1623 int code_offset
= XINT (XCDR (XCAR (parents
)));
1624 struct charset
*this_charset
= CHARSET_FROM_ID (id
);
1626 code
= ENCODE_CHAR (this_charset
, c
);
1627 if (code
!= CHARSET_INVALID_CODE (this_charset
))
1628 return code
+ code_offset
;
1630 return CHARSET_INVALID_CODE (charset
);
1633 if (! CHARSET_FAST_MAP_REF ((c
), charset
->fast_map
)
1634 || c
< CHARSET_MIN_CHAR (charset
) || c
> CHARSET_MAX_CHAR (charset
))
1635 return CHARSET_INVALID_CODE (charset
);
1637 if (method
== CHARSET_METHOD_MAP_DEFERRED
)
1639 load_charset (charset
);
1640 method
= CHARSET_METHOD (charset
);
1643 if (method
== CHARSET_METHOD_MAP
)
1645 Lisp_Object encoder
;
1648 encoder
= CHARSET_ENCODER (charset
);
1649 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset
)))
1650 return CHARSET_INVALID_CODE (charset
);
1651 val
= CHAR_TABLE_REF (encoder
, c
);
1653 return CHARSET_INVALID_CODE (charset
);
1655 if (! CHARSET_COMPACT_CODES_P (charset
))
1656 code
= INDEX_TO_CODE_POINT (charset
, code
);
1658 else /* method == CHARSET_METHOD_OFFSET */
1660 code
= c
- CHARSET_CODE_OFFSET (charset
);
1661 code
= INDEX_TO_CODE_POINT (charset
, code
);
1668 DEFUN ("decode-char", Fdecode_char
, Sdecode_char
, 2, 3, 0,
1669 doc
: /* Decode the pair of CHARSET and CODE-POINT into a character.
1670 Return nil if CODE-POINT is not valid in CHARSET.
1672 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1674 Optional argument RESTRICTION specifies a way to map the pair of CCS
1675 and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1676 (charset
, code_point
, restriction
)
1677 Lisp_Object charset
, code_point
, restriction
;
1681 struct charset
*charsetp
;
1683 CHECK_CHARSET_GET_ID (charset
, id
);
1684 if (CONSP (code_point
))
1686 CHECK_NATNUM_CAR (code_point
);
1687 CHECK_NATNUM_CDR (code_point
);
1688 code
= (XINT (XCAR (code_point
)) << 16) | (XINT (XCDR (code_point
)));
1692 CHECK_NATNUM (code_point
);
1693 code
= XINT (code_point
);
1695 charsetp
= CHARSET_FROM_ID (id
);
1696 c
= DECODE_CHAR (charsetp
, code
);
1697 return (c
>= 0 ? make_number (c
) : Qnil
);
1701 DEFUN ("encode-char", Fencode_char
, Sencode_char
, 2, 3, 0,
1702 doc
: /* Encode the character CH into a code-point of CHARSET.
1703 Return nil if CHARSET doesn't include CH.
1705 Optional argument RESTRICTION specifies a way to map CHAR to a
1706 code-point in CCS. Currently not supported and just ignored. */)
1707 (ch
, charset
, restriction
)
1708 Lisp_Object ch
, charset
, restriction
;
1712 struct charset
*charsetp
;
1714 CHECK_CHARSET_GET_ID (charset
, id
);
1716 charsetp
= CHARSET_FROM_ID (id
);
1717 code
= ENCODE_CHAR (charsetp
, XINT (ch
));
1718 if (code
== CHARSET_INVALID_CODE (charsetp
))
1720 if (code
> 0x7FFFFFF)
1721 return Fcons (make_number (code
>> 16), make_number (code
& 0xFFFF));
1722 return make_number (code
);
1726 DEFUN ("make-char", Fmake_char
, Smake_char
, 1, 5, 0,
1728 /* Return a character of CHARSET whose position codes are CODEn.
1730 CODE1 through CODE4 are optional, but if you don't supply sufficient
1731 position codes, it is assumed that the minimum code in each dimension
1733 (charset
, code1
, code2
, code3
, code4
)
1734 Lisp_Object charset
, code1
, code2
, code3
, code4
;
1737 struct charset
*charsetp
;
1741 CHECK_CHARSET_GET_ID (charset
, id
);
1742 charsetp
= CHARSET_FROM_ID (id
);
1744 dimension
= CHARSET_DIMENSION (charsetp
);
1746 code
= (CHARSET_ASCII_COMPATIBLE_P (charsetp
)
1747 ? 0 : CHARSET_MIN_CODE (charsetp
));
1750 CHECK_NATNUM (code1
);
1751 if (XFASTINT (code1
) >= 0x100)
1752 args_out_of_range (make_number (0xFF), code1
);
1753 code
= XFASTINT (code1
);
1759 code
|= charsetp
->code_space
[(dimension
- 2) * 4];
1762 CHECK_NATNUM (code2
);
1763 if (XFASTINT (code2
) >= 0x100)
1764 args_out_of_range (make_number (0xFF), code2
);
1765 code
|= XFASTINT (code2
);
1772 code
|= charsetp
->code_space
[(dimension
- 3) * 4];
1775 CHECK_NATNUM (code3
);
1776 if (XFASTINT (code3
) >= 0x100)
1777 args_out_of_range (make_number (0xFF), code3
);
1778 code
|= XFASTINT (code3
);
1785 code
|= charsetp
->code_space
[0];
1788 CHECK_NATNUM (code4
);
1789 if (XFASTINT (code4
) >= 0x100)
1790 args_out_of_range (make_number (0xFF), code4
);
1791 code
|= XFASTINT (code4
);
1798 if (CHARSET_ISO_FINAL (charsetp
) >= 0)
1800 c
= DECODE_CHAR (charsetp
, code
);
1802 error ("Invalid code(s)");
1803 return make_number (c
);
1807 /* Return the first charset in CHARSET_LIST that contains C.
1808 CHARSET_LIST is a list of charset IDs. If it is nil, use
1809 Vcharset_ordered_list. */
1812 char_charset (c
, charset_list
, code_return
)
1814 Lisp_Object charset_list
;
1815 unsigned *code_return
;
1817 if (NILP (charset_list
))
1818 charset_list
= Vcharset_ordered_list
;
1820 while (CONSP (charset_list
))
1822 struct charset
*charset
= CHARSET_FROM_ID (XINT (XCAR (charset_list
)));
1823 unsigned code
= ENCODE_CHAR (charset
, c
);
1825 if (code
!= CHARSET_INVALID_CODE (charset
))
1828 *code_return
= code
;
1831 charset_list
= XCDR (charset_list
);
1837 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
1839 /*Return list of charset and one to four position-codes of CHAR.
1840 The charset is decided by the current priority order of charsets.
1841 A position-code is a byte value of each dimension of the code-point of
1842 CHAR in the charset. */)
1846 struct charset
*charset
;
1851 CHECK_CHARACTER (ch
);
1853 charset
= CHAR_CHARSET (c
);
1856 code
= ENCODE_CHAR (charset
, c
);
1857 if (code
== CHARSET_INVALID_CODE (charset
))
1859 dimension
= CHARSET_DIMENSION (charset
);
1860 for (val
= Qnil
; dimension
> 0; dimension
--)
1862 val
= Fcons (make_number (code
& 0xFF), val
);
1865 return Fcons (CHARSET_NAME (charset
), val
);
1869 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 1, 0,
1870 doc
: /* Return the charset of highest priority that contains CH. */)
1874 struct charset
*charset
;
1876 CHECK_CHARACTER (ch
);
1877 charset
= CHAR_CHARSET (XINT (ch
));
1878 return (CHARSET_NAME (charset
));
1882 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
1884 Return charset of a character in the current buffer at position POS.
1885 If POS is nil, it defauls to the current point.
1886 If POS is out of range, the value is nil. */)
1891 struct charset
*charset
;
1893 ch
= Fchar_after (pos
);
1894 if (! INTEGERP (ch
))
1896 charset
= CHAR_CHARSET (XINT (ch
));
1897 return (CHARSET_NAME (charset
));
1901 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
1903 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1905 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1906 by their DIMENSION, CHARS, and FINAL-CHAR,
1907 where as Emacs distinguishes them by charset symbol.
1908 See the documentation of the function `charset-info' for the meanings of
1909 DIMENSION, CHARS, and FINAL-CHAR. */)
1910 (dimension
, chars
, final_char
)
1911 Lisp_Object dimension
, chars
, final_char
;
1916 check_iso_charset_parameter (dimension
, chars
, final_char
);
1917 chars_flag
= XFASTINT (chars
) == 96;
1918 id
= ISO_CHARSET_TABLE (XFASTINT (dimension
), chars_flag
,
1919 XFASTINT (final_char
));
1920 return (id
>= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id
)) : Qnil
);
1924 DEFUN ("clear-charset-maps", Fclear_charset_maps
, Sclear_charset_maps
,
1927 Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1931 struct charset
*charset
;
1934 for (i
= 0; i
< charset_table_used
; i
++)
1936 charset
= CHARSET_FROM_ID (i
);
1937 attrs
= CHARSET_ATTRIBUTES (charset
);
1939 if (CHARSET_METHOD (charset
) == CHARSET_METHOD_MAP
)
1941 CHARSET_ATTR_DECODER (attrs
) = Qnil
;
1942 CHARSET_ATTR_ENCODER (attrs
) = Qnil
;
1943 CHARSET_METHOD (charset
) = CHARSET_METHOD_MAP_DEFERRED
;
1946 if (CHARSET_UNIFIED_P (charset
))
1947 CHARSET_ATTR_DEUNIFIER (attrs
) = Qnil
;
1950 if (CHAR_TABLE_P (Vchar_unified_charset_table
))
1952 Foptimize_char_table (Vchar_unified_charset_table
);
1953 Vchar_unify_table
= Vchar_unified_charset_table
;
1954 Vchar_unified_charset_table
= Qnil
;
1960 DEFUN ("charset-priority-list", Fcharset_priority_list
,
1961 Scharset_priority_list
, 0, 1, 0,
1962 doc
: /* Return the list of charsets ordered by priority.
1963 HIGHESTP non-nil means just return the highest priority one. */)
1965 Lisp_Object highestp
;
1967 Lisp_Object val
= Qnil
, list
= Vcharset_ordered_list
;
1969 if (!NILP (highestp
))
1970 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list
))));
1972 while (!NILP (list
))
1974 val
= Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list
)))), val
);
1977 return Fnreverse (val
);
1980 DEFUN ("set-charset-priority", Fset_charset_priority
, Sset_charset_priority
,
1982 doc
: /* Assign higher priority to the charsets given as arguments.
1983 usage: (set-charset-priority &rest charsets) */)
1988 Lisp_Object new_head
, old_list
, arglist
[2];
1989 Lisp_Object list_2022
, list_emacs_mule
;
1992 old_list
= Fcopy_sequence (Vcharset_ordered_list
);
1994 for (i
= 0; i
< nargs
; i
++)
1996 CHECK_CHARSET_GET_ID (args
[i
], id
);
1997 if (! NILP (Fmemq (make_number (id
), old_list
)))
1999 old_list
= Fdelq (make_number (id
), old_list
);
2000 new_head
= Fcons (make_number (id
), new_head
);
2003 arglist
[0] = Fnreverse (new_head
);
2004 arglist
[1] = old_list
;
2005 Vcharset_ordered_list
= Fnconc (2, arglist
);
2006 charset_ordered_list_tick
++;
2008 for (old_list
= Vcharset_ordered_list
, list_2022
= list_emacs_mule
= Qnil
;
2009 CONSP (old_list
); old_list
= XCDR (old_list
))
2011 if (! NILP (Fmemq (XCAR (old_list
), Viso_2022_charset_list
)))
2012 list_2022
= Fcons (XCAR (old_list
), list_2022
);
2013 if (! NILP (Fmemq (XCAR (old_list
), Vemacs_mule_charset_list
)))
2014 list_emacs_mule
= Fcons (XCAR (old_list
), list_emacs_mule
);
2016 Viso_2022_charset_list
= Fnreverse (list_2022
);
2017 Vemacs_mule_charset_list
= Fnreverse (list_emacs_mule
);
2022 DEFUN ("charset-id-internal", Fcharset_id_internal
, Scharset_id_internal
,
2024 doc
: /* Internal use only.
2025 Return charset identification number of CHARSET. */)
2027 Lisp_Object charset
;
2031 CHECK_CHARSET_GET_ID (charset
, id
);
2032 return make_number (id
);
2040 = Fcons (Fexpand_file_name (build_string ("charsets"), Vdata_directory
),
2046 init_charset_once ()
2050 for (i
= 0; i
< ISO_MAX_DIMENSION
; i
++)
2051 for (j
= 0; j
< ISO_MAX_CHARS
; j
++)
2052 for (k
= 0; k
< ISO_MAX_FINAL
; k
++)
2053 iso_charset_table
[i
][j
][k
] = -1;
2055 for (i
= 0; i
< 256; i
++)
2056 emacs_mule_charset
[i
] = NULL
;
2058 charset_jisx0201_roman
= -1;
2059 charset_jisx0208_1978
= -1;
2060 charset_jisx0208
= -1;
2062 for (i
= 0; i
< 128; i
++)
2063 unibyte_to_multibyte_table
[i
] = i
;
2064 for (; i
< 256; i
++)
2065 unibyte_to_multibyte_table
[i
] = BYTE8_TO_CHAR (i
);
2073 DEFSYM (Qcharsetp
, "charsetp");
2075 DEFSYM (Qascii
, "ascii");
2076 DEFSYM (Qunicode
, "unicode");
2077 DEFSYM (Qeight_bit
, "eight-bit");
2078 DEFSYM (Qiso_8859_1
, "iso-8859-1");
2083 staticpro (&Vcharset_ordered_list
);
2084 Vcharset_ordered_list
= Qnil
;
2086 staticpro (&Viso_2022_charset_list
);
2087 Viso_2022_charset_list
= Qnil
;
2089 staticpro (&Vemacs_mule_charset_list
);
2090 Vemacs_mule_charset_list
= Qnil
;
2092 staticpro (&Vcharset_hash_table
);
2094 Lisp_Object args
[2];
2097 Vcharset_hash_table
= Fmake_hash_table (2, args
);
2100 charset_table_size
= 128;
2101 charset_table
= ((struct charset
*)
2102 xmalloc (sizeof (struct charset
) * charset_table_size
));
2103 charset_table_used
= 0;
2105 staticpro (&Vchar_unified_charset_table
);
2106 Vchar_unified_charset_table
= Fmake_char_table (Qnil
, make_number (-1));
2108 defsubr (&Scharsetp
);
2109 defsubr (&Smap_charset_chars
);
2110 defsubr (&Sdefine_charset_internal
);
2111 defsubr (&Sdefine_charset_alias
);
2112 defsubr (&Sunibyte_charset
);
2113 defsubr (&Sset_unibyte_charset
);
2114 defsubr (&Scharset_plist
);
2115 defsubr (&Sset_charset_plist
);
2116 defsubr (&Sunify_charset
);
2117 defsubr (&Sget_unused_iso_final_char
);
2118 defsubr (&Sdeclare_equiv_charset
);
2119 defsubr (&Sfind_charset_region
);
2120 defsubr (&Sfind_charset_string
);
2121 defsubr (&Sdecode_char
);
2122 defsubr (&Sencode_char
);
2123 defsubr (&Ssplit_char
);
2124 defsubr (&Smake_char
);
2125 defsubr (&Schar_charset
);
2126 defsubr (&Scharset_after
);
2127 defsubr (&Siso_charset
);
2128 defsubr (&Sclear_charset_maps
);
2129 defsubr (&Scharset_priority_list
);
2130 defsubr (&Sset_charset_priority
);
2131 defsubr (&Scharset_id_internal
);
2133 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path
,
2134 doc
: /* *Lisp of directories to search for charset map files. */);
2135 Vcharset_map_path
= Qnil
;
2137 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2138 doc
: /* List of all charsets ever defined. */);
2139 Vcharset_list
= Qnil
;
2142 = define_charset_internal (Qascii
, 1, "\x00\x7F\x00\x00\x00\x00",
2143 0, 127, 'B', -1, 0, 1, 0, 0);
2145 = define_charset_internal (Qiso_8859_1
, 1, "\x00\xFF\x00\x00\x00\x00",
2146 0, 255, -1, -1, -1, 1, 0, 0);
2148 = define_charset_internal (Qunicode
, 3, "\x00\xFF\x00\xFF\x00\x10",
2149 0, MAX_UNICODE_CHAR
, -1, 0, -1, 1, 0, 0);
2151 = define_charset_internal (Qeight_bit
, 1, "\x80\xFF\x00\x00\x00\x00",
2152 128, 255, -1, 0, -1, 0, 0,
2153 MAX_5_BYTE_CHAR
+ 1);
2158 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2159 (do not change this comment) */