]> code.delx.au - gnu-emacs/blob - src/charset.c
* dispextern.h, xfaces.c (xstrcasecmp): Renamed from xstricmp.
[gnu-emacs] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
3 2006, 2007, 2008 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include "lisp.h"
35 #include "character.h"
36 #include "charset.h"
37 #include "coding.h"
38 #include "disptab.h"
39 #include "buffer.h"
40
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
42
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
48
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
53
54 */
55
56 /* List of all charsets. This variable is used only from Emacs
57 Lisp. */
58 Lisp_Object Vcharset_list;
59
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table;
63
64 /* Table of struct charset. */
65 struct charset *charset_table;
66
67 static int charset_table_size;
68 static int charset_table_used;
69
70 Lisp_Object Qcharsetp;
71
72 /* Special charset symbols. */
73 Lisp_Object Qascii;
74 Lisp_Object Qeight_bit;
75 Lisp_Object Qiso_8859_1;
76 Lisp_Object Qunicode;
77
78 /* The corresponding charsets. */
79 int charset_ascii;
80 int charset_eight_bit;
81 int charset_iso_8859_1;
82 int charset_unicode;
83
84 /* The other special charsets. */
85 int charset_jisx0201_roman;
86 int charset_jisx0208_1978;
87 int charset_jisx0208;
88
89 /* Value of charset attribute `charset-iso-plane'. */
90 Lisp_Object Qgl, Qgr;
91
92 /* Charset of unibyte characters. */
93 int charset_unibyte;
94
95 /* List of charsets ordered by the priority. */
96 Lisp_Object Vcharset_ordered_list;
97
98 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
99 charsets. */
100 Lisp_Object Vcharset_non_preferred_head;
101
102 /* Incremented everytime we change Vcharset_ordered_list. This is
103 unsigned short so that it fits in Lisp_Int and never matches
104 -1. */
105 unsigned short charset_ordered_list_tick;
106
107 /* List of iso-2022 charsets. */
108 Lisp_Object Viso_2022_charset_list;
109
110 /* List of emacs-mule charsets. */
111 Lisp_Object Vemacs_mule_charset_list;
112
113 struct charset *emacs_mule_charset[256];
114
115 /* Mapping table from ISO2022's charset (specified by DIMENSION,
116 CHARS, and FINAL-CHAR) to Emacs' charset. */
117 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
118
119 Lisp_Object Vcharset_map_path;
120
121 Lisp_Object Vchar_unified_charset_table;
122
123 Lisp_Object Vcurrent_iso639_language;
124
125 /* Defined in chartab.c */
126 extern void
127 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
128 Lisp_Object function, Lisp_Object table,
129 Lisp_Object arg, struct charset *charset,
130 unsigned from, unsigned to));
131
132 #define CODE_POINT_TO_INDEX(charset, code) \
133 ((charset)->code_linear_p \
134 ? (code) - (charset)->min_code \
135 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
136 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
137 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
138 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
139 ? (((((code) >> 24) - (charset)->code_space[12]) \
140 * (charset)->code_space[11]) \
141 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
142 * (charset)->code_space[7]) \
143 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
144 * (charset)->code_space[3]) \
145 + (((code) & 0xFF) - (charset)->code_space[0]) \
146 - ((charset)->char_index_offset)) \
147 : -1)
148
149
150 /* Convert the character index IDX to code-point CODE for CHARSET.
151 It is assumed that IDX is in a valid range. */
152
153 #define INDEX_TO_CODE_POINT(charset, idx) \
154 ((charset)->code_linear_p \
155 ? (idx) + (charset)->min_code \
156 : (idx += (charset)->char_index_offset, \
157 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
158 | (((charset)->code_space[4] \
159 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
160 << 8) \
161 | (((charset)->code_space[8] \
162 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
163 << 16) \
164 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
165 << 24))))
166
167
168 \f
169
170 /* Set to 1 to warn that a charset map is loaded and thus a buffer
171 text and a string data may be relocated. */
172 int charset_map_loaded;
173
174 struct charset_map_entries
175 {
176 struct {
177 unsigned from, to;
178 int c;
179 } entry[0x10000];
180 struct charset_map_entries *next;
181 };
182
183 /* Load the mapping information for CHARSET from ENTRIES.
184
185 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
186
187 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
188 CHARSET->decoder, and CHARSET->encoder.
189
190 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
191 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
192 setup it too. */
193
194 static void
195 load_charset_map (charset, entries, n_entries, control_flag)
196 struct charset *charset;
197 struct charset_map_entries *entries;
198 int n_entries;
199 int control_flag;
200 {
201 Lisp_Object vec, table;
202 unsigned max_code = CHARSET_MAX_CODE (charset);
203 int ascii_compatible_p = charset->ascii_compatible_p;
204 int min_char, max_char, nonascii_min_char;
205 int i;
206 unsigned char *fast_map = charset->fast_map;
207
208 if (n_entries <= 0)
209 return;
210
211 if (control_flag > 0)
212 {
213 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
214
215 table = Fmake_char_table (Qnil, Qnil);
216 if (control_flag == 1)
217 vec = Fmake_vector (make_number (n), make_number (-1));
218 else if (! CHAR_TABLE_P (Vchar_unify_table))
219 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
220
221 charset_map_loaded = 1;
222 }
223
224 min_char = max_char = entries->entry[0].c;
225 nonascii_min_char = MAX_CHAR;
226 for (i = 0; i < n_entries; i++)
227 {
228 unsigned from, to;
229 int from_index, to_index;
230 int from_c, to_c;
231 int idx = i % 0x10000;
232
233 if (i > 0 && idx == 0)
234 entries = entries->next;
235 from = entries->entry[idx].from;
236 to = entries->entry[idx].to;
237 from_c = entries->entry[idx].c;
238 from_index = CODE_POINT_TO_INDEX (charset, from);
239 if (from == to)
240 {
241 to_index = from_index;
242 to_c = from_c;
243 }
244 else
245 {
246 to_index = CODE_POINT_TO_INDEX (charset, to);
247 to_c = from_c + (to_index - from_index);
248 }
249 if (from_index < 0 || to_index < 0)
250 continue;
251
252 if (control_flag < 2)
253 {
254 int c;
255
256 if (to_c > max_char)
257 max_char = to_c;
258 else if (from_c < min_char)
259 min_char = from_c;
260 if (ascii_compatible_p)
261 {
262 if (! ASCII_BYTE_P (from_c))
263 {
264 if (from_c < nonascii_min_char)
265 nonascii_min_char = from_c;
266 }
267 else if (! ASCII_BYTE_P (to_c))
268 {
269 nonascii_min_char = 0x80;
270 }
271 }
272
273 for (c = from_c; c <= to_c; c++)
274 CHARSET_FAST_MAP_SET (c, fast_map);
275
276 if (control_flag == 1)
277 {
278 unsigned code = from;
279
280 if (CHARSET_COMPACT_CODES_P (charset))
281 while (1)
282 {
283 ASET (vec, from_index, make_number (from_c));
284 if (NILP (CHAR_TABLE_REF (table, from_c)))
285 CHAR_TABLE_SET (table, from_c, make_number (code));
286 if (from_index == to_index)
287 break;
288 from_index++, from_c++;
289 code = INDEX_TO_CODE_POINT (charset, from_index);
290 }
291 else
292 for (; from_index <= to_index; from_index++, from_c++)
293 {
294 ASET (vec, from_index, make_number (from_c));
295 if (NILP (CHAR_TABLE_REF (table, from_c)))
296 CHAR_TABLE_SET (table, from_c, make_number (from_index));
297 }
298 }
299 }
300 else
301 {
302 unsigned code = from;
303
304 while (1)
305 {
306 int c1 = DECODE_CHAR (charset, code);
307
308 if (c1 >= 0)
309 {
310 CHAR_TABLE_SET (table, from_c, make_number (c1));
311 CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
312 if (CHAR_TABLE_P (Vchar_unified_charset_table))
313 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
314 CHARSET_NAME (charset));
315 }
316 if (from_index == to_index)
317 break;
318 from_index++, from_c++;
319 code = INDEX_TO_CODE_POINT (charset, from_index);
320 }
321 }
322 }
323
324 if (control_flag < 2)
325 {
326 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
327 ? nonascii_min_char : min_char);
328 CHARSET_MAX_CHAR (charset) = max_char;
329 if (control_flag == 1)
330 {
331 CHARSET_DECODER (charset) = vec;
332 CHARSET_ENCODER (charset) = table;
333 }
334 }
335 else
336 CHARSET_DEUNIFIER (charset) = table;
337 }
338
339
340 /* Read a hexadecimal number (preceded by "0x") from the file FP while
341 paying attention to comment charcter '#'. */
342
343 static INLINE unsigned
344 read_hex (fp, eof)
345 FILE *fp;
346 int *eof;
347 {
348 int c;
349 unsigned n;
350
351 while ((c = getc (fp)) != EOF)
352 {
353 if (c == '#')
354 {
355 while ((c = getc (fp)) != EOF && c != '\n');
356 }
357 else if (c == '0')
358 {
359 if ((c = getc (fp)) == EOF || c == 'x')
360 break;
361 }
362 }
363 if (c == EOF)
364 {
365 *eof = 1;
366 return 0;
367 }
368 *eof = 0;
369 n = 0;
370 if (c == 'x')
371 while ((c = getc (fp)) != EOF && isxdigit (c))
372 n = ((n << 4)
373 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
374 else
375 while ((c = getc (fp)) != EOF && isdigit (c))
376 n = (n * 10) + c - '0';
377 if (c != EOF)
378 ungetc (c, fp);
379 return n;
380 }
381
382
383 /* Return a mapping vector for CHARSET loaded from MAPFILE.
384 Each line of MAPFILE has this form
385 0xAAAA 0xCCCC
386 where 0xAAAA is a code-point and 0xCCCC is the corresponding
387 character code, or this form
388 0xAAAA-0xBBBB 0xCCCC
389 where 0xAAAA and 0xBBBB are code-points specifying a range, and
390 0xCCCC is the first character code of the range.
391
392 The returned vector has this form:
393 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
394 where CODE1 is a code-point or a cons of code-points specifying a
395 range. */
396
397 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
398
399 static void
400 load_charset_map_from_file (charset, mapfile, control_flag)
401 struct charset *charset;
402 Lisp_Object mapfile;
403 int control_flag;
404 {
405 unsigned min_code = CHARSET_MIN_CODE (charset);
406 unsigned max_code = CHARSET_MAX_CODE (charset);
407 int fd;
408 FILE *fp;
409 int eof;
410 Lisp_Object suffixes;
411 struct charset_map_entries *head, *entries;
412 int n_entries;
413
414 suffixes = Fcons (build_string (".map"),
415 Fcons (build_string (".TXT"), Qnil));
416
417 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
418 if (fd < 0
419 || ! (fp = fdopen (fd, "r")))
420 {
421 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
422 return;
423 }
424
425 head = entries = ((struct charset_map_entries *)
426 alloca (sizeof (struct charset_map_entries)));
427 n_entries = 0;
428 eof = 0;
429 while (1)
430 {
431 unsigned from, to;
432 int c;
433 int idx;
434
435 from = read_hex (fp, &eof);
436 if (eof)
437 break;
438 if (getc (fp) == '-')
439 to = read_hex (fp, &eof);
440 else
441 to = from;
442 c = (int) read_hex (fp, &eof);
443
444 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
445 continue;
446
447 if (n_entries > 0 && (n_entries % 0x10000) == 0)
448 {
449 entries->next = ((struct charset_map_entries *)
450 alloca (sizeof (struct charset_map_entries)));
451 entries = entries->next;
452 }
453 idx = n_entries % 0x10000;
454 entries->entry[idx].from = from;
455 entries->entry[idx].to = to;
456 entries->entry[idx].c = c;
457 n_entries++;
458 }
459 fclose (fp);
460 close (fd);
461
462 load_charset_map (charset, head, n_entries, control_flag);
463 }
464
465 static void
466 load_charset_map_from_vector (charset, vec, control_flag)
467 struct charset *charset;
468 Lisp_Object vec;
469 int control_flag;
470 {
471 unsigned min_code = CHARSET_MIN_CODE (charset);
472 unsigned max_code = CHARSET_MAX_CODE (charset);
473 struct charset_map_entries *head, *entries;
474 int n_entries;
475 int len = ASIZE (vec);
476 int i;
477
478 if (len % 2 == 1)
479 {
480 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
481 return;
482 }
483
484 head = entries = ((struct charset_map_entries *)
485 alloca (sizeof (struct charset_map_entries)));
486 n_entries = 0;
487 for (i = 0; i < len; i += 2)
488 {
489 Lisp_Object val, val2;
490 unsigned from, to;
491 int c;
492 int idx;
493
494 val = AREF (vec, i);
495 if (CONSP (val))
496 {
497 val2 = XCDR (val);
498 val = XCAR (val);
499 CHECK_NATNUM (val);
500 CHECK_NATNUM (val2);
501 from = XFASTINT (val);
502 to = XFASTINT (val2);
503 }
504 else
505 {
506 CHECK_NATNUM (val);
507 from = to = XFASTINT (val);
508 }
509 val = AREF (vec, i + 1);
510 CHECK_NATNUM (val);
511 c = XFASTINT (val);
512
513 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
514 continue;
515
516 if (n_entries > 0 && (n_entries % 0x10000) == 0)
517 {
518 entries->next = ((struct charset_map_entries *)
519 alloca (sizeof (struct charset_map_entries)));
520 entries = entries->next;
521 }
522 idx = n_entries % 0x10000;
523 entries->entry[idx].from = from;
524 entries->entry[idx].to = to;
525 entries->entry[idx].c = c;
526 n_entries++;
527 }
528
529 load_charset_map (charset, head, n_entries, control_flag);
530 }
531
532 static void
533 load_charset (charset)
534 struct charset *charset;
535 {
536 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
537 {
538 Lisp_Object map;
539
540 map = CHARSET_MAP (charset);
541 if (STRINGP (map))
542 load_charset_map_from_file (charset, map, 1);
543 else
544 load_charset_map_from_vector (charset, map, 1);
545 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
546 }
547 }
548
549
550 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
551 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
552 (object)
553 Lisp_Object object;
554 {
555 return (CHARSETP (object) ? Qt : Qnil);
556 }
557
558
559 void
560 map_charset_chars (c_function, function, arg,
561 charset, from, to)
562 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
563 Lisp_Object function, arg;
564 struct charset *charset;
565 unsigned from, to;
566 {
567 Lisp_Object range;
568 int partial;
569
570 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
571 load_charset (charset);
572
573 partial = (from > CHARSET_MIN_CODE (charset)
574 || to < CHARSET_MAX_CODE (charset));
575
576 if (CHARSET_UNIFIED_P (charset)
577 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
578 {
579 map_char_table_for_charset (c_function, function,
580 CHARSET_DEUNIFIER (charset), arg,
581 partial ? charset : NULL, from, to);
582 }
583
584 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
585 {
586 int from_idx = CODE_POINT_TO_INDEX (charset, from);
587 int to_idx = CODE_POINT_TO_INDEX (charset, to);
588 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
589 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
590
591 range = Fcons (make_number (from_c), make_number (to_c));
592 if (NILP (function))
593 (*c_function) (arg, range);
594 else
595 call2 (function, range, arg);
596 }
597 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
598 {
599 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
600 return;
601 map_char_table_for_charset (c_function, function,
602 CHARSET_ENCODER (charset), arg,
603 partial ? charset : NULL, from, to);
604 }
605 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
606 {
607 Lisp_Object subset_info;
608 int offset;
609
610 subset_info = CHARSET_SUBSET (charset);
611 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
612 offset = XINT (AREF (subset_info, 3));
613 from -= offset;
614 if (from < XFASTINT (AREF (subset_info, 1)))
615 from = XFASTINT (AREF (subset_info, 1));
616 to -= offset;
617 if (to > XFASTINT (AREF (subset_info, 2)))
618 to = XFASTINT (AREF (subset_info, 2));
619 map_charset_chars (c_function, function, arg, charset, from, to);
620 }
621 else /* i.e. CHARSET_METHOD_SUPERSET */
622 {
623 Lisp_Object parents;
624
625 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
626 parents = XCDR (parents))
627 {
628 int offset;
629 unsigned this_from, this_to;
630
631 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
632 offset = XINT (XCDR (XCAR (parents)));
633 this_from = from - offset;
634 this_to = to - offset;
635 if (this_from < CHARSET_MIN_CODE (charset))
636 this_from = CHARSET_MIN_CODE (charset);
637 if (this_to > CHARSET_MAX_CODE (charset))
638 this_to = CHARSET_MAX_CODE (charset);
639 map_charset_chars (c_function, function, arg, charset,
640 this_from, this_to);
641 }
642 }
643 }
644
645 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
646 doc: /* Call FUNCTION for all characters in CHARSET.
647 FUNCTION is called with an argument RANGE and the optional 3rd
648 argument ARG.
649
650 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
651 characters contained in CHARSET.
652
653 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
654 range of code points of target characters. */)
655 (function, charset, arg, from_code, to_code)
656 Lisp_Object function, charset, arg, from_code, to_code;
657 {
658 struct charset *cs;
659 unsigned from, to;
660
661 CHECK_CHARSET_GET_CHARSET (charset, cs);
662 if (NILP (from_code))
663 from = CHARSET_MIN_CODE (cs);
664 else
665 {
666 CHECK_NATNUM (from_code);
667 from = XINT (from_code);
668 if (from < CHARSET_MIN_CODE (cs))
669 from = CHARSET_MIN_CODE (cs);
670 }
671 if (NILP (to_code))
672 to = CHARSET_MAX_CODE (cs);
673 else
674 {
675 CHECK_NATNUM (to_code);
676 to = XINT (to_code);
677 if (to > CHARSET_MAX_CODE (cs))
678 to = CHARSET_MAX_CODE (cs);
679 }
680 map_charset_chars (NULL, function, arg, cs, from, to);
681 return Qnil;
682 }
683
684
685 /* Define a charset according to the arguments. The Nth argument is
686 the Nth attribute of the charset (the last attribute `charset-id'
687 is not included). See the docstring of `define-charset' for the
688 detail. */
689
690 DEFUN ("define-charset-internal", Fdefine_charset_internal,
691 Sdefine_charset_internal, charset_arg_max, MANY, 0,
692 doc: /* For internal use only.
693 usage: (define-charset-internal ...) */)
694 (nargs, args)
695 int nargs;
696 Lisp_Object *args;
697 {
698 /* Charset attr vector. */
699 Lisp_Object attrs;
700 Lisp_Object val;
701 unsigned hash_code;
702 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
703 int i, j;
704 struct charset charset;
705 int id;
706 int dimension;
707 int new_definition_p;
708 int nchars;
709
710 if (nargs != charset_arg_max)
711 return Fsignal (Qwrong_number_of_arguments,
712 Fcons (intern ("define-charset-internal"),
713 make_number (nargs)));
714
715 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
716
717 CHECK_SYMBOL (args[charset_arg_name]);
718 ASET (attrs, charset_name, args[charset_arg_name]);
719
720 val = args[charset_arg_code_space];
721 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
722 {
723 int min_byte, max_byte;
724
725 min_byte = XINT (Faref (val, make_number (i * 2)));
726 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
727 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
728 error ("Invalid :code-space value");
729 charset.code_space[i * 4] = min_byte;
730 charset.code_space[i * 4 + 1] = max_byte;
731 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
732 nchars *= charset.code_space[i * 4 + 2];
733 charset.code_space[i * 4 + 3] = nchars;
734 if (max_byte > 0)
735 dimension = i + 1;
736 }
737
738 val = args[charset_arg_dimension];
739 if (NILP (val))
740 charset.dimension = dimension;
741 else
742 {
743 CHECK_NATNUM (val);
744 charset.dimension = XINT (val);
745 if (charset.dimension < 1 || charset.dimension > 4)
746 args_out_of_range_3 (val, make_number (1), make_number (4));
747 }
748
749 charset.code_linear_p
750 = (charset.dimension == 1
751 || (charset.code_space[2] == 256
752 && (charset.dimension == 2
753 || (charset.code_space[6] == 256
754 && (charset.dimension == 3
755 || charset.code_space[10] == 256)))));
756
757 if (! charset.code_linear_p)
758 {
759 charset.code_space_mask = (unsigned char *) xmalloc (256);
760 bzero (charset.code_space_mask, 256);
761 for (i = 0; i < 4; i++)
762 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
763 j++)
764 charset.code_space_mask[j] |= (1 << i);
765 }
766
767 charset.iso_chars_96 = charset.code_space[2] == 96;
768
769 charset.min_code = (charset.code_space[0]
770 | (charset.code_space[4] << 8)
771 | (charset.code_space[8] << 16)
772 | (charset.code_space[12] << 24));
773 charset.max_code = (charset.code_space[1]
774 | (charset.code_space[5] << 8)
775 | (charset.code_space[9] << 16)
776 | (charset.code_space[13] << 24));
777 charset.char_index_offset = 0;
778
779 val = args[charset_arg_min_code];
780 if (! NILP (val))
781 {
782 unsigned code;
783
784 if (INTEGERP (val))
785 code = XINT (val);
786 else
787 {
788 CHECK_CONS (val);
789 CHECK_NUMBER_CAR (val);
790 CHECK_NUMBER_CDR (val);
791 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
792 }
793 if (code < charset.min_code
794 || code > charset.max_code)
795 args_out_of_range_3 (make_number (charset.min_code),
796 make_number (charset.max_code), val);
797 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
798 charset.min_code = code;
799 }
800
801 val = args[charset_arg_max_code];
802 if (! NILP (val))
803 {
804 unsigned code;
805
806 if (INTEGERP (val))
807 code = XINT (val);
808 else
809 {
810 CHECK_CONS (val);
811 CHECK_NUMBER_CAR (val);
812 CHECK_NUMBER_CDR (val);
813 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
814 }
815 if (code < charset.min_code
816 || code > charset.max_code)
817 args_out_of_range_3 (make_number (charset.min_code),
818 make_number (charset.max_code), val);
819 charset.max_code = code;
820 }
821
822 charset.compact_codes_p = charset.max_code < 0x1000000;
823
824 val = args[charset_arg_invalid_code];
825 if (NILP (val))
826 {
827 if (charset.min_code > 0)
828 charset.invalid_code = 0;
829 else
830 {
831 XSETINT (val, charset.max_code + 1);
832 if (XINT (val) == charset.max_code + 1)
833 charset.invalid_code = charset.max_code + 1;
834 else
835 error ("Attribute :invalid-code must be specified");
836 }
837 }
838 else
839 {
840 CHECK_NATNUM (val);
841 charset.invalid_code = XFASTINT (val);
842 }
843
844 val = args[charset_arg_iso_final];
845 if (NILP (val))
846 charset.iso_final = -1;
847 else
848 {
849 CHECK_NUMBER (val);
850 if (XINT (val) < '0' || XINT (val) > 127)
851 error ("Invalid iso-final-char: %d", XINT (val));
852 charset.iso_final = XINT (val);
853 }
854
855 val = args[charset_arg_iso_revision];
856 if (NILP (val))
857 charset.iso_revision = -1;
858 else
859 {
860 CHECK_NUMBER (val);
861 if (XINT (val) > 63)
862 args_out_of_range (make_number (63), val);
863 charset.iso_revision = XINT (val);
864 }
865
866 val = args[charset_arg_emacs_mule_id];
867 if (NILP (val))
868 charset.emacs_mule_id = -1;
869 else
870 {
871 CHECK_NATNUM (val);
872 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
873 error ("Invalid emacs-mule-id: %d", XINT (val));
874 charset.emacs_mule_id = XINT (val);
875 }
876
877 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
878
879 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
880
881 charset.unified_p = 0;
882
883 bzero (charset.fast_map, sizeof (charset.fast_map));
884
885 if (! NILP (args[charset_arg_code_offset]))
886 {
887 val = args[charset_arg_code_offset];
888 CHECK_NUMBER (val);
889
890 charset.method = CHARSET_METHOD_OFFSET;
891 charset.code_offset = XINT (val);
892
893 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
894 charset.min_char = i + charset.code_offset;
895 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
896 charset.max_char = i + charset.code_offset;
897 if (charset.max_char > MAX_CHAR)
898 error ("Unsupported max char: %d", charset.max_char);
899
900 i = (charset.min_char >> 7) << 7;
901 for (; i < 0x10000 && i <= charset.max_char; i += 128)
902 CHARSET_FAST_MAP_SET (i, charset.fast_map);
903 i = (i >> 12) << 12;
904 for (; i <= charset.max_char; i += 0x1000)
905 CHARSET_FAST_MAP_SET (i, charset.fast_map);
906 }
907 else if (! NILP (args[charset_arg_map]))
908 {
909 val = args[charset_arg_map];
910 ASET (attrs, charset_map, val);
911 if (STRINGP (val))
912 load_charset_map_from_file (&charset, val, 0);
913 else
914 load_charset_map_from_vector (&charset, val, 0);
915 charset.method = CHARSET_METHOD_MAP_DEFERRED;
916 }
917 else if (! NILP (args[charset_arg_subset]))
918 {
919 Lisp_Object parent;
920 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
921 struct charset *parent_charset;
922
923 val = args[charset_arg_subset];
924 parent = Fcar (val);
925 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
926 parent_min_code = Fnth (make_number (1), val);
927 CHECK_NATNUM (parent_min_code);
928 parent_max_code = Fnth (make_number (2), val);
929 CHECK_NATNUM (parent_max_code);
930 parent_code_offset = Fnth (make_number (3), val);
931 CHECK_NUMBER (parent_code_offset);
932 val = Fmake_vector (make_number (4), Qnil);
933 ASET (val, 0, make_number (parent_charset->id));
934 ASET (val, 1, parent_min_code);
935 ASET (val, 2, parent_max_code);
936 ASET (val, 3, parent_code_offset);
937 ASET (attrs, charset_subset, val);
938
939 charset.method = CHARSET_METHOD_SUBSET;
940 /* Here, we just copy the parent's fast_map. It's not accurate,
941 but at least it works for quickly detecting which character
942 DOESN'T belong to this charset. */
943 for (i = 0; i < 190; i++)
944 charset.fast_map[i] = parent_charset->fast_map[i];
945
946 /* We also copy these for parents. */
947 charset.min_char = parent_charset->min_char;
948 charset.max_char = parent_charset->max_char;
949 }
950 else if (! NILP (args[charset_arg_superset]))
951 {
952 val = args[charset_arg_superset];
953 charset.method = CHARSET_METHOD_SUPERSET;
954 val = Fcopy_sequence (val);
955 ASET (attrs, charset_superset, val);
956
957 charset.min_char = MAX_CHAR;
958 charset.max_char = 0;
959 for (; ! NILP (val); val = Fcdr (val))
960 {
961 Lisp_Object elt, car_part, cdr_part;
962 int this_id, offset;
963 struct charset *this_charset;
964
965 elt = Fcar (val);
966 if (CONSP (elt))
967 {
968 car_part = XCAR (elt);
969 cdr_part = XCDR (elt);
970 CHECK_CHARSET_GET_ID (car_part, this_id);
971 CHECK_NUMBER (cdr_part);
972 offset = XINT (cdr_part);
973 }
974 else
975 {
976 CHECK_CHARSET_GET_ID (elt, this_id);
977 offset = 0;
978 }
979 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
980
981 this_charset = CHARSET_FROM_ID (this_id);
982 if (charset.min_char > this_charset->min_char)
983 charset.min_char = this_charset->min_char;
984 if (charset.max_char < this_charset->max_char)
985 charset.max_char = this_charset->max_char;
986 for (i = 0; i < 190; i++)
987 charset.fast_map[i] |= this_charset->fast_map[i];
988 }
989 }
990 else
991 error ("None of :code-offset, :map, :parents are specified");
992
993 val = args[charset_arg_unify_map];
994 if (! NILP (val) && !STRINGP (val))
995 CHECK_VECTOR (val);
996 ASET (attrs, charset_unify_map, val);
997
998 CHECK_LIST (args[charset_arg_plist]);
999 ASET (attrs, charset_plist, args[charset_arg_plist]);
1000
1001 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1002 &hash_code);
1003 if (charset.hash_index >= 0)
1004 {
1005 new_definition_p = 0;
1006 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1007 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1008 }
1009 else
1010 {
1011 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1012 hash_code);
1013 if (charset_table_used == charset_table_size)
1014 {
1015 struct charset *new_table
1016 = (struct charset *) xmalloc (sizeof (struct charset)
1017 * (charset_table_size + 16));
1018 bcopy (charset_table, new_table,
1019 sizeof (struct charset) * charset_table_size);
1020 charset_table_size += 16;
1021 charset_table = new_table;
1022 }
1023 id = charset_table_used++;
1024 new_definition_p = 1;
1025 }
1026
1027 ASET (attrs, charset_id, make_number (id));
1028 charset.id = id;
1029 charset_table[id] = charset;
1030
1031 if (charset.iso_final >= 0)
1032 {
1033 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1034 charset.iso_final) = id;
1035 if (new_definition_p)
1036 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1037 Fcons (make_number (id), Qnil));
1038 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1039 charset_jisx0201_roman = id;
1040 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1041 charset_jisx0208_1978 = id;
1042 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1043 charset_jisx0208 = id;
1044 }
1045
1046 if (charset.emacs_mule_id >= 0)
1047 {
1048 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1049 if (charset.emacs_mule_id < 0xA0)
1050 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1051 else
1052 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1053 if (new_definition_p)
1054 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1055 Fcons (make_number (id), Qnil));
1056 }
1057
1058 if (new_definition_p)
1059 {
1060 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1061 if (charset.supplementary_p)
1062 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1063 Fcons (make_number (id), Qnil));
1064 else
1065 {
1066 Lisp_Object tail;
1067
1068 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1069 {
1070 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1071
1072 if (cs->supplementary_p)
1073 break;
1074 }
1075 if (EQ (tail, Vcharset_ordered_list))
1076 Vcharset_ordered_list = Fcons (make_number (id),
1077 Vcharset_ordered_list);
1078 else if (NILP (tail))
1079 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1080 Fcons (make_number (id), Qnil));
1081 else
1082 {
1083 val = Fcons (XCAR (tail), XCDR (tail));
1084 XSETCDR (tail, val);
1085 XSETCAR (tail, make_number (id));
1086 }
1087 }
1088 charset_ordered_list_tick++;
1089 }
1090
1091 return Qnil;
1092 }
1093
1094
1095 /* Same as Fdefine_charset_internal but arguments are more convenient
1096 to call from C (typically in syms_of_charset). This can define a
1097 charset of `offset' method only. Return the ID of the new
1098 charset. */
1099
1100 static int
1101 define_charset_internal (name, dimension, code_space, min_code, max_code,
1102 iso_final, iso_revision, emacs_mule_id,
1103 ascii_compatible, supplementary,
1104 code_offset)
1105 Lisp_Object name;
1106 int dimension;
1107 unsigned char *code_space;
1108 unsigned min_code, max_code;
1109 int iso_final, iso_revision, emacs_mule_id;
1110 int ascii_compatible, supplementary;
1111 int code_offset;
1112 {
1113 Lisp_Object args[charset_arg_max];
1114 Lisp_Object plist[14];
1115 Lisp_Object val;
1116 int i;
1117
1118 args[charset_arg_name] = name;
1119 args[charset_arg_dimension] = make_number (dimension);
1120 val = Fmake_vector (make_number (8), make_number (0));
1121 for (i = 0; i < 8; i++)
1122 ASET (val, i, make_number (code_space[i]));
1123 args[charset_arg_code_space] = val;
1124 args[charset_arg_min_code] = make_number (min_code);
1125 args[charset_arg_max_code] = make_number (max_code);
1126 args[charset_arg_iso_final]
1127 = (iso_final < 0 ? Qnil : make_number (iso_final));
1128 args[charset_arg_iso_revision] = make_number (iso_revision);
1129 args[charset_arg_emacs_mule_id]
1130 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1131 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1132 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1133 args[charset_arg_invalid_code] = Qnil;
1134 args[charset_arg_code_offset] = make_number (code_offset);
1135 args[charset_arg_map] = Qnil;
1136 args[charset_arg_subset] = Qnil;
1137 args[charset_arg_superset] = Qnil;
1138 args[charset_arg_unify_map] = Qnil;
1139
1140 plist[0] = intern (":name");
1141 plist[1] = args[charset_arg_name];
1142 plist[2] = intern (":dimension");
1143 plist[3] = args[charset_arg_dimension];
1144 plist[4] = intern (":code-space");
1145 plist[5] = args[charset_arg_code_space];
1146 plist[6] = intern (":iso-final-char");
1147 plist[7] = args[charset_arg_iso_final];
1148 plist[8] = intern (":emacs-mule-id");
1149 plist[9] = args[charset_arg_emacs_mule_id];
1150 plist[10] = intern (":ascii-compatible-p");
1151 plist[11] = args[charset_arg_ascii_compatible_p];
1152 plist[12] = intern (":code-offset");
1153 plist[13] = args[charset_arg_code_offset];
1154
1155 args[charset_arg_plist] = Flist (14, plist);
1156 Fdefine_charset_internal (charset_arg_max, args);
1157
1158 return XINT (CHARSET_SYMBOL_ID (name));
1159 }
1160
1161
1162 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1163 Sdefine_charset_alias, 2, 2, 0,
1164 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1165 (alias, charset)
1166 Lisp_Object alias, charset;
1167 {
1168 Lisp_Object attr;
1169
1170 CHECK_CHARSET_GET_ATTR (charset, attr);
1171 Fputhash (alias, attr, Vcharset_hash_table);
1172 Vcharset_list = Fcons (alias, Vcharset_list);
1173 return Qnil;
1174 }
1175
1176
1177 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1178 doc: /* Return the property list of CHARSET. */)
1179 (charset)
1180 Lisp_Object charset;
1181 {
1182 Lisp_Object attrs;
1183
1184 CHECK_CHARSET_GET_ATTR (charset, attrs);
1185 return CHARSET_ATTR_PLIST (attrs);
1186 }
1187
1188
1189 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1190 doc: /* Set CHARSET's property list to PLIST. */)
1191 (charset, plist)
1192 Lisp_Object charset, plist;
1193 {
1194 Lisp_Object attrs;
1195
1196 CHECK_CHARSET_GET_ATTR (charset, attrs);
1197 CHARSET_ATTR_PLIST (attrs) = plist;
1198 return plist;
1199 }
1200
1201
1202 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1203 doc: /* Unify characters of CHARSET with Unicode.
1204 This means reading the relevant file and installing the table defined
1205 by CHARSET's `:unify-map' property.
1206
1207 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1208 the same meaning as the `:unify-map' attribute in the function
1209 `define-charset' (which see).
1210
1211 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1212 (charset, unify_map, deunify)
1213 Lisp_Object charset, unify_map, deunify;
1214 {
1215 int id;
1216 struct charset *cs;
1217
1218 CHECK_CHARSET_GET_ID (charset, id);
1219 cs = CHARSET_FROM_ID (id);
1220 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1221 load_charset (cs);
1222 if (NILP (deunify)
1223 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1224 : ! CHARSET_UNIFIED_P (cs))
1225 return Qnil;
1226
1227 CHARSET_UNIFIED_P (cs) = 0;
1228 if (NILP (deunify))
1229 {
1230 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
1231 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1232 if (NILP (unify_map))
1233 unify_map = CHARSET_UNIFY_MAP (cs);
1234 if (STRINGP (unify_map))
1235 load_charset_map_from_file (cs, unify_map, 2);
1236 else if (VECTORP (unify_map))
1237 load_charset_map_from_vector (cs, unify_map, 2);
1238 else if (NILP (unify_map))
1239 error ("No unify-map for charset");
1240 else
1241 error ("Bad unify-map arg");
1242 CHARSET_UNIFIED_P (cs) = 1;
1243 }
1244 else if (CHAR_TABLE_P (Vchar_unify_table))
1245 {
1246 int min_code = CHARSET_MIN_CODE (cs);
1247 int max_code = CHARSET_MAX_CODE (cs);
1248 int min_char = DECODE_CHAR (cs, min_code);
1249 int max_char = DECODE_CHAR (cs, max_code);
1250
1251 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1252 }
1253
1254 return Qnil;
1255 }
1256
1257 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1258 Sget_unused_iso_final_char, 2, 2, 0,
1259 doc: /*
1260 Return an unused ISO final char for a charset of DIMENISION and CHARS.
1261 DIMENSION is the number of bytes to represent a character: 1 or 2.
1262 CHARS is the number of characters in a dimension: 94 or 96.
1263
1264 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1265 If there's no unused final char for the specified kind of charset,
1266 return nil. */)
1267 (dimension, chars)
1268 Lisp_Object dimension, chars;
1269 {
1270 int final_char;
1271
1272 CHECK_NUMBER (dimension);
1273 CHECK_NUMBER (chars);
1274 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1275 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1276 if (XINT (chars) != 94 && XINT (chars) != 96)
1277 args_out_of_range_3 (chars, make_number (94), make_number (96));
1278 for (final_char = '0'; final_char <= '?'; final_char++)
1279 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1280 break;
1281 return (final_char <= '?' ? make_number (final_char) : Qnil);
1282 }
1283
1284 static void
1285 check_iso_charset_parameter (dimension, chars, final_char)
1286 Lisp_Object dimension, chars, final_char;
1287 {
1288 CHECK_NATNUM (dimension);
1289 CHECK_NATNUM (chars);
1290 CHECK_NATNUM (final_char);
1291
1292 if (XINT (dimension) > 3)
1293 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1294 if (XINT (chars) != 94 && XINT (chars) != 96)
1295 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1296 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1297 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1298 }
1299
1300
1301 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1302 4, 4, 0,
1303 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1304
1305 On decoding by an ISO-2022 base coding system, when a charset
1306 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1307 if CHARSET is designated instead. */)
1308 (dimension, chars, final_char, charset)
1309 Lisp_Object dimension, chars, final_char, charset;
1310 {
1311 int id;
1312 int chars_flag;
1313
1314 CHECK_CHARSET_GET_ID (charset, id);
1315 check_iso_charset_parameter (dimension, chars, final_char);
1316 chars_flag = XINT (chars) == 96;
1317 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1318 return Qnil;
1319 }
1320
1321
1322 /* Return information about charsets in the text at PTR of NBYTES
1323 bytes, which are NCHARS characters. The value is:
1324
1325 0: Each character is represented by one byte. This is always
1326 true for a unibyte string. For a multibyte string, true if
1327 it contains only ASCII characters.
1328
1329 1: No charsets other than ascii, control-1, and latin-1 are
1330 found.
1331
1332 2: Otherwise.
1333 */
1334
1335 int
1336 string_xstring_p (string)
1337 Lisp_Object string;
1338 {
1339 const unsigned char *p = SDATA (string);
1340 const unsigned char *endp = p + SBYTES (string);
1341
1342 if (SCHARS (string) == SBYTES (string))
1343 return 0;
1344
1345 while (p < endp)
1346 {
1347 int c = STRING_CHAR_ADVANCE (p);
1348
1349 if (c >= 0x100)
1350 return 2;
1351 }
1352 return 1;
1353 }
1354
1355
1356 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1357
1358 CHARSETS is a vector. If Nth element is non-nil, it means the
1359 charset whose id is N is already found.
1360
1361 It may lookup a translation table TABLE if supplied. */
1362
1363 static void
1364 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1365 const unsigned char *ptr;
1366 EMACS_INT nchars, nbytes;
1367 Lisp_Object charsets, table;
1368 int multibyte;
1369 {
1370 const unsigned char *pend = ptr + nbytes;
1371
1372 if (nchars == nbytes)
1373 {
1374 if (multibyte)
1375 ASET (charsets, charset_ascii, Qt);
1376 else
1377 while (ptr < pend)
1378 {
1379 int c = *ptr++;
1380
1381 if (!NILP (table))
1382 c = translate_char (table, c);
1383 if (ASCII_BYTE_P (c))
1384 ASET (charsets, charset_ascii, Qt);
1385 else
1386 ASET (charsets, charset_eight_bit, Qt);
1387 }
1388 }
1389 else
1390 {
1391 while (ptr < pend)
1392 {
1393 int c = STRING_CHAR_ADVANCE (ptr);
1394 struct charset *charset;
1395
1396 if (!NILP (table))
1397 c = translate_char (table, c);
1398 charset = CHAR_CHARSET (c);
1399 ASET (charsets, CHARSET_ID (charset), Qt);
1400 }
1401 }
1402 }
1403
1404 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1405 2, 3, 0,
1406 doc: /* Return a list of charsets in the region between BEG and END.
1407 BEG and END are buffer positions.
1408 Optional arg TABLE if non-nil is a translation table to look up.
1409
1410 If the current buffer is unibyte, the returned list may contain
1411 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1412 (beg, end, table)
1413 Lisp_Object beg, end, table;
1414 {
1415 Lisp_Object charsets;
1416 EMACS_INT from, from_byte, to, stop, stop_byte;
1417 int i;
1418 Lisp_Object val;
1419 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1420
1421 validate_region (&beg, &end);
1422 from = XFASTINT (beg);
1423 stop = to = XFASTINT (end);
1424
1425 if (from < GPT && GPT < to)
1426 {
1427 stop = GPT;
1428 stop_byte = GPT_BYTE;
1429 }
1430 else
1431 stop_byte = CHAR_TO_BYTE (stop);
1432
1433 from_byte = CHAR_TO_BYTE (from);
1434
1435 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1436 while (1)
1437 {
1438 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1439 stop_byte - from_byte, charsets, table,
1440 multibyte);
1441 if (stop < to)
1442 {
1443 from = stop, from_byte = stop_byte;
1444 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1445 }
1446 else
1447 break;
1448 }
1449
1450 val = Qnil;
1451 for (i = charset_table_used - 1; i >= 0; i--)
1452 if (!NILP (AREF (charsets, i)))
1453 val = Fcons (CHARSET_NAME (charset_table + i), val);
1454 return val;
1455 }
1456
1457 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1458 1, 2, 0,
1459 doc: /* Return a list of charsets in STR.
1460 Optional arg TABLE if non-nil is a translation table to look up.
1461
1462 If STR is unibyte, the returned list may contain
1463 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1464 (str, table)
1465 Lisp_Object str, table;
1466 {
1467 Lisp_Object charsets;
1468 int i;
1469 Lisp_Object val;
1470
1471 CHECK_STRING (str);
1472
1473 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1474 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1475 charsets, table,
1476 STRING_MULTIBYTE (str));
1477 val = Qnil;
1478 for (i = charset_table_used - 1; i >= 0; i--)
1479 if (!NILP (AREF (charsets, i)))
1480 val = Fcons (CHARSET_NAME (charset_table + i), val);
1481 return val;
1482 }
1483
1484 \f
1485
1486 /* Return a character correponding to the code-point CODE of
1487 CHARSET. */
1488
1489 int
1490 decode_char (charset, code)
1491 struct charset *charset;
1492 unsigned code;
1493 {
1494 int c, char_index;
1495 enum charset_method method = CHARSET_METHOD (charset);
1496
1497 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1498 return -1;
1499
1500 if (method == CHARSET_METHOD_MAP_DEFERRED)
1501 {
1502 load_charset (charset);
1503 method = CHARSET_METHOD (charset);
1504 }
1505
1506 if (method == CHARSET_METHOD_SUBSET)
1507 {
1508 Lisp_Object subset_info;
1509
1510 subset_info = CHARSET_SUBSET (charset);
1511 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1512 code -= XINT (AREF (subset_info, 3));
1513 if (code < XFASTINT (AREF (subset_info, 1))
1514 || code > XFASTINT (AREF (subset_info, 2)))
1515 c = -1;
1516 else
1517 c = DECODE_CHAR (charset, code);
1518 }
1519 else if (method == CHARSET_METHOD_SUPERSET)
1520 {
1521 Lisp_Object parents;
1522
1523 parents = CHARSET_SUPERSET (charset);
1524 c = -1;
1525 for (; CONSP (parents); parents = XCDR (parents))
1526 {
1527 int id = XINT (XCAR (XCAR (parents)));
1528 int code_offset = XINT (XCDR (XCAR (parents)));
1529 unsigned this_code = code - code_offset;
1530
1531 charset = CHARSET_FROM_ID (id);
1532 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1533 break;
1534 }
1535 }
1536 else
1537 {
1538 char_index = CODE_POINT_TO_INDEX (charset, code);
1539 if (char_index < 0)
1540 return -1;
1541
1542 if (method == CHARSET_METHOD_MAP)
1543 {
1544 Lisp_Object decoder;
1545
1546 decoder = CHARSET_DECODER (charset);
1547 if (! VECTORP (decoder))
1548 return -1;
1549 c = XINT (AREF (decoder, char_index));
1550 }
1551 else
1552 {
1553 c = char_index + CHARSET_CODE_OFFSET (charset);
1554 }
1555 }
1556
1557 if (CHARSET_UNIFIED_P (charset)
1558 && c >= 0)
1559 {
1560 MAYBE_UNIFY_CHAR (c);
1561 }
1562
1563 return c;
1564 }
1565
1566 /* Variable used temporarily by the macro ENCODE_CHAR. */
1567 Lisp_Object charset_work;
1568
1569 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1570 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1571 use CHARSET's strict_max_char instead of max_char. */
1572
1573 unsigned
1574 encode_char (charset, c)
1575 struct charset *charset;
1576 int c;
1577 {
1578 unsigned code;
1579 enum charset_method method = CHARSET_METHOD (charset);
1580
1581 if (CHARSET_UNIFIED_P (charset))
1582 {
1583 Lisp_Object deunifier, deunified;
1584
1585 deunifier = CHARSET_DEUNIFIER (charset);
1586 if (! CHAR_TABLE_P (deunifier))
1587 {
1588 Funify_charset (CHARSET_NAME (charset), Qnil, Qnil);
1589 deunifier = CHARSET_DEUNIFIER (charset);
1590 }
1591 deunified = CHAR_TABLE_REF (deunifier, c);
1592 if (! NILP (deunified))
1593 c = XINT (deunified);
1594 }
1595
1596 if (method == CHARSET_METHOD_SUBSET)
1597 {
1598 Lisp_Object subset_info;
1599 struct charset *this_charset;
1600
1601 subset_info = CHARSET_SUBSET (charset);
1602 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1603 code = ENCODE_CHAR (this_charset, c);
1604 if (code == CHARSET_INVALID_CODE (this_charset)
1605 || code < XFASTINT (AREF (subset_info, 1))
1606 || code > XFASTINT (AREF (subset_info, 2)))
1607 return CHARSET_INVALID_CODE (charset);
1608 code += XINT (AREF (subset_info, 3));
1609 return code;
1610 }
1611
1612 if (method == CHARSET_METHOD_SUPERSET)
1613 {
1614 Lisp_Object parents;
1615
1616 parents = CHARSET_SUPERSET (charset);
1617 for (; CONSP (parents); parents = XCDR (parents))
1618 {
1619 int id = XINT (XCAR (XCAR (parents)));
1620 int code_offset = XINT (XCDR (XCAR (parents)));
1621 struct charset *this_charset = CHARSET_FROM_ID (id);
1622
1623 code = ENCODE_CHAR (this_charset, c);
1624 if (code != CHARSET_INVALID_CODE (this_charset))
1625 return code + code_offset;
1626 }
1627 return CHARSET_INVALID_CODE (charset);
1628 }
1629
1630 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1631 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1632 return CHARSET_INVALID_CODE (charset);
1633
1634 if (method == CHARSET_METHOD_MAP_DEFERRED)
1635 {
1636 load_charset (charset);
1637 method = CHARSET_METHOD (charset);
1638 }
1639
1640 if (method == CHARSET_METHOD_MAP)
1641 {
1642 Lisp_Object encoder;
1643 Lisp_Object val;
1644
1645 encoder = CHARSET_ENCODER (charset);
1646 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1647 return CHARSET_INVALID_CODE (charset);
1648 val = CHAR_TABLE_REF (encoder, c);
1649 if (NILP (val))
1650 return CHARSET_INVALID_CODE (charset);
1651 code = XINT (val);
1652 if (! CHARSET_COMPACT_CODES_P (charset))
1653 code = INDEX_TO_CODE_POINT (charset, code);
1654 }
1655 else /* method == CHARSET_METHOD_OFFSET */
1656 {
1657 code = c - CHARSET_CODE_OFFSET (charset);
1658 code = INDEX_TO_CODE_POINT (charset, code);
1659 }
1660
1661 return code;
1662 }
1663
1664
1665 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1666 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1667 Return nil if CODE-POINT is not valid in CHARSET.
1668
1669 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1670
1671 Optional argument RESTRICTION specifies a way to map the pair of CCS
1672 and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1673 (charset, code_point, restriction)
1674 Lisp_Object charset, code_point, restriction;
1675 {
1676 int c, id;
1677 unsigned code;
1678 struct charset *charsetp;
1679
1680 CHECK_CHARSET_GET_ID (charset, id);
1681 if (CONSP (code_point))
1682 {
1683 CHECK_NATNUM_CAR (code_point);
1684 CHECK_NATNUM_CDR (code_point);
1685 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1686 }
1687 else
1688 {
1689 CHECK_NATNUM (code_point);
1690 code = XINT (code_point);
1691 }
1692 charsetp = CHARSET_FROM_ID (id);
1693 c = DECODE_CHAR (charsetp, code);
1694 return (c >= 0 ? make_number (c) : Qnil);
1695 }
1696
1697
1698 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1699 doc: /* Encode the character CH into a code-point of CHARSET.
1700 Return nil if CHARSET doesn't include CH.
1701
1702 Optional argument RESTRICTION specifies a way to map CHAR to a
1703 code-point in CCS. Currently not supported and just ignored. */)
1704 (ch, charset, restriction)
1705 Lisp_Object ch, charset, restriction;
1706 {
1707 int id;
1708 unsigned code;
1709 struct charset *charsetp;
1710
1711 CHECK_CHARSET_GET_ID (charset, id);
1712 CHECK_NATNUM (ch);
1713 charsetp = CHARSET_FROM_ID (id);
1714 code = ENCODE_CHAR (charsetp, XINT (ch));
1715 if (code == CHARSET_INVALID_CODE (charsetp))
1716 return Qnil;
1717 if (code > 0x7FFFFFF)
1718 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1719 return make_number (code);
1720 }
1721
1722
1723 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1724 doc:
1725 /* Return a character of CHARSET whose position codes are CODEn.
1726
1727 CODE1 through CODE4 are optional, but if you don't supply sufficient
1728 position codes, it is assumed that the minimum code in each dimension
1729 is specified. */)
1730 (charset, code1, code2, code3, code4)
1731 Lisp_Object charset, code1, code2, code3, code4;
1732 {
1733 int id, dimension;
1734 struct charset *charsetp;
1735 unsigned code;
1736 int c;
1737
1738 CHECK_CHARSET_GET_ID (charset, id);
1739 charsetp = CHARSET_FROM_ID (id);
1740
1741 dimension = CHARSET_DIMENSION (charsetp);
1742 if (NILP (code1))
1743 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1744 ? 0 : CHARSET_MIN_CODE (charsetp));
1745 else
1746 {
1747 CHECK_NATNUM (code1);
1748 if (XFASTINT (code1) >= 0x100)
1749 args_out_of_range (make_number (0xFF), code1);
1750 code = XFASTINT (code1);
1751
1752 if (dimension > 1)
1753 {
1754 code <<= 8;
1755 if (NILP (code2))
1756 code |= charsetp->code_space[(dimension - 2) * 4];
1757 else
1758 {
1759 CHECK_NATNUM (code2);
1760 if (XFASTINT (code2) >= 0x100)
1761 args_out_of_range (make_number (0xFF), code2);
1762 code |= XFASTINT (code2);
1763 }
1764
1765 if (dimension > 2)
1766 {
1767 code <<= 8;
1768 if (NILP (code3))
1769 code |= charsetp->code_space[(dimension - 3) * 4];
1770 else
1771 {
1772 CHECK_NATNUM (code3);
1773 if (XFASTINT (code3) >= 0x100)
1774 args_out_of_range (make_number (0xFF), code3);
1775 code |= XFASTINT (code3);
1776 }
1777
1778 if (dimension > 3)
1779 {
1780 code <<= 8;
1781 if (NILP (code4))
1782 code |= charsetp->code_space[0];
1783 else
1784 {
1785 CHECK_NATNUM (code4);
1786 if (XFASTINT (code4) >= 0x100)
1787 args_out_of_range (make_number (0xFF), code4);
1788 code |= XFASTINT (code4);
1789 }
1790 }
1791 }
1792 }
1793 }
1794
1795 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1796 code &= 0x7F7F7F7F;
1797 c = DECODE_CHAR (charsetp, code);
1798 if (c < 0)
1799 error ("Invalid code(s)");
1800 return make_number (c);
1801 }
1802
1803
1804 /* Return the first charset in CHARSET_LIST that contains C.
1805 CHARSET_LIST is a list of charset IDs. If it is nil, use
1806 Vcharset_ordered_list. */
1807
1808 struct charset *
1809 char_charset (c, charset_list, code_return)
1810 int c;
1811 Lisp_Object charset_list;
1812 unsigned *code_return;
1813 {
1814 if (NILP (charset_list))
1815 charset_list = Vcharset_ordered_list;
1816
1817 while (CONSP (charset_list)
1818 && ! EQ (charset_list, Vcharset_non_preferred_head))
1819 {
1820 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1821 unsigned code = ENCODE_CHAR (charset, c);
1822
1823 if (code != CHARSET_INVALID_CODE (charset))
1824 {
1825 if (code_return)
1826 *code_return = code;
1827 return charset;
1828 }
1829 charset_list = XCDR (charset_list);
1830 }
1831 return (c <= MAX_UNICODE_CHAR ? CHARSET_FROM_ID (charset_unicode)
1832 : CHARSET_FROM_ID (charset_eight_bit));
1833 }
1834
1835
1836 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1837 doc:
1838 /*Return list of charset and one to four position-codes of CHAR.
1839 The charset is decided by the current priority order of charsets.
1840 A position-code is a byte value of each dimension of the code-point of
1841 CHAR in the charset. */)
1842 (ch)
1843 Lisp_Object ch;
1844 {
1845 struct charset *charset;
1846 int c, dimension;
1847 unsigned code;
1848 Lisp_Object val;
1849
1850 CHECK_CHARACTER (ch);
1851 c = XFASTINT (ch);
1852 charset = CHAR_CHARSET (c);
1853 if (! charset)
1854 abort ();
1855 code = ENCODE_CHAR (charset, c);
1856 if (code == CHARSET_INVALID_CODE (charset))
1857 abort ();
1858 dimension = CHARSET_DIMENSION (charset);
1859 for (val = Qnil; dimension > 0; dimension--)
1860 {
1861 val = Fcons (make_number (code & 0xFF), val);
1862 code >>= 8;
1863 }
1864 return Fcons (CHARSET_NAME (charset), val);
1865 }
1866
1867
1868 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1869 doc: /* Return the charset of highest priority that contains CH. */)
1870 (ch)
1871 Lisp_Object ch;
1872 {
1873 struct charset *charset;
1874
1875 CHECK_CHARACTER (ch);
1876 charset = CHAR_CHARSET (XINT (ch));
1877 return (CHARSET_NAME (charset));
1878 }
1879
1880
1881 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1882 doc: /*
1883 Return charset of a character in the current buffer at position POS.
1884 If POS is nil, it defauls to the current point.
1885 If POS is out of range, the value is nil. */)
1886 (pos)
1887 Lisp_Object pos;
1888 {
1889 Lisp_Object ch;
1890 struct charset *charset;
1891
1892 ch = Fchar_after (pos);
1893 if (! INTEGERP (ch))
1894 return ch;
1895 charset = CHAR_CHARSET (XINT (ch));
1896 return (CHARSET_NAME (charset));
1897 }
1898
1899
1900 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1901 doc: /*
1902 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1903
1904 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1905 by their DIMENSION, CHARS, and FINAL-CHAR,
1906 where as Emacs distinguishes them by charset symbol.
1907 See the documentation of the function `charset-info' for the meanings of
1908 DIMENSION, CHARS, and FINAL-CHAR. */)
1909 (dimension, chars, final_char)
1910 Lisp_Object dimension, chars, final_char;
1911 {
1912 int id;
1913 int chars_flag;
1914
1915 check_iso_charset_parameter (dimension, chars, final_char);
1916 chars_flag = XFASTINT (chars) == 96;
1917 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
1918 XFASTINT (final_char));
1919 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
1920 }
1921
1922
1923 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1924 0, 0, 0,
1925 doc: /*
1926 Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1927 ()
1928 {
1929 int i;
1930 struct charset *charset;
1931 Lisp_Object attrs;
1932
1933 for (i = 0; i < charset_table_used; i++)
1934 {
1935 charset = CHARSET_FROM_ID (i);
1936 attrs = CHARSET_ATTRIBUTES (charset);
1937
1938 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1939 {
1940 CHARSET_ATTR_DECODER (attrs) = Qnil;
1941 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1942 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1943 }
1944
1945 if (CHARSET_UNIFIED_P (charset))
1946 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
1947 }
1948
1949 if (CHAR_TABLE_P (Vchar_unified_charset_table))
1950 {
1951 Foptimize_char_table (Vchar_unified_charset_table);
1952 Vchar_unify_table = Vchar_unified_charset_table;
1953 Vchar_unified_charset_table = Qnil;
1954 }
1955
1956 return Qnil;
1957 }
1958
1959 DEFUN ("charset-priority-list", Fcharset_priority_list,
1960 Scharset_priority_list, 0, 1, 0,
1961 doc: /* Return the list of charsets ordered by priority.
1962 HIGHESTP non-nil means just return the highest priority one. */)
1963 (highestp)
1964 Lisp_Object highestp;
1965 {
1966 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
1967
1968 if (!NILP (highestp))
1969 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
1970
1971 while (!NILP (list))
1972 {
1973 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
1974 list = XCDR (list);
1975 }
1976 return Fnreverse (val);
1977 }
1978
1979 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
1980 1, MANY, 0,
1981 doc: /* Assign higher priority to the charsets given as arguments.
1982 usage: (set-charset-priority &rest charsets) */)
1983 (nargs, args)
1984 int nargs;
1985 Lisp_Object *args;
1986 {
1987 Lisp_Object new_head, old_list, arglist[2];
1988 Lisp_Object list_2022, list_emacs_mule;
1989 int i, id;
1990
1991 old_list = Fcopy_sequence (Vcharset_ordered_list);
1992 new_head = Qnil;
1993 for (i = 0; i < nargs; i++)
1994 {
1995 CHECK_CHARSET_GET_ID (args[i], id);
1996 if (! NILP (Fmemq (make_number (id), old_list)))
1997 {
1998 old_list = Fdelq (make_number (id), old_list);
1999 new_head = Fcons (make_number (id), new_head);
2000 }
2001 }
2002 arglist[0] = Fnreverse (new_head);
2003 arglist[1] = Vcharset_non_preferred_head = old_list;
2004 Vcharset_ordered_list = Fnconc (2, arglist);
2005 charset_ordered_list_tick++;
2006
2007 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2008 CONSP (old_list); old_list = XCDR (old_list))
2009 {
2010 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2011 list_2022 = Fcons (XCAR (old_list), list_2022);
2012 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2013 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2014 }
2015 Viso_2022_charset_list = Fnreverse (list_2022);
2016 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2017
2018 return Qnil;
2019 }
2020
2021 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2022 0, 1, 0,
2023 doc: /* Internal use only.
2024 Return charset identification number of CHARSET. */)
2025 (charset)
2026 Lisp_Object charset;
2027 {
2028 int id;
2029
2030 CHECK_CHARSET_GET_ID (charset, id);
2031 return make_number (id);
2032 }
2033
2034 \f
2035 void
2036 init_charset ()
2037 {
2038 Vcharset_map_path
2039 = Fcons (Fexpand_file_name (build_string ("charsets"), Vdata_directory),
2040 Qnil);
2041 }
2042
2043
2044 void
2045 init_charset_once ()
2046 {
2047 int i, j, k;
2048
2049 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2050 for (j = 0; j < ISO_MAX_CHARS; j++)
2051 for (k = 0; k < ISO_MAX_FINAL; k++)
2052 iso_charset_table[i][j][k] = -1;
2053
2054 for (i = 0; i < 256; i++)
2055 emacs_mule_charset[i] = NULL;
2056
2057 charset_jisx0201_roman = -1;
2058 charset_jisx0208_1978 = -1;
2059 charset_jisx0208 = -1;
2060
2061 for (i = 0; i < 128; i++)
2062 unibyte_to_multibyte_table[i] = i;
2063 for (; i < 256; i++)
2064 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
2065 }
2066
2067 #ifdef emacs
2068
2069 void
2070 syms_of_charset ()
2071 {
2072 DEFSYM (Qcharsetp, "charsetp");
2073
2074 DEFSYM (Qascii, "ascii");
2075 DEFSYM (Qunicode, "unicode");
2076 DEFSYM (Qeight_bit, "eight-bit");
2077 DEFSYM (Qiso_8859_1, "iso-8859-1");
2078
2079 DEFSYM (Qgl, "gl");
2080 DEFSYM (Qgr, "gr");
2081
2082 staticpro (&Vcharset_ordered_list);
2083 Vcharset_ordered_list = Qnil;
2084
2085 staticpro (&Viso_2022_charset_list);
2086 Viso_2022_charset_list = Qnil;
2087
2088 staticpro (&Vemacs_mule_charset_list);
2089 Vemacs_mule_charset_list = Qnil;
2090
2091 /* Don't staticpro them here. It's done in syms_of_fns. */
2092 QCtest = intern (":test");
2093 Qeq = intern ("eq");
2094
2095 staticpro (&Vcharset_hash_table);
2096 {
2097 Lisp_Object args[2];
2098 args[0] = QCtest;
2099 args[1] = Qeq;
2100 Vcharset_hash_table = Fmake_hash_table (2, args);
2101 }
2102
2103 charset_table_size = 128;
2104 charset_table = ((struct charset *)
2105 xmalloc (sizeof (struct charset) * charset_table_size));
2106 charset_table_used = 0;
2107
2108 staticpro (&Vchar_unified_charset_table);
2109 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
2110
2111 defsubr (&Scharsetp);
2112 defsubr (&Smap_charset_chars);
2113 defsubr (&Sdefine_charset_internal);
2114 defsubr (&Sdefine_charset_alias);
2115 defsubr (&Scharset_plist);
2116 defsubr (&Sset_charset_plist);
2117 defsubr (&Sunify_charset);
2118 defsubr (&Sget_unused_iso_final_char);
2119 defsubr (&Sdeclare_equiv_charset);
2120 defsubr (&Sfind_charset_region);
2121 defsubr (&Sfind_charset_string);
2122 defsubr (&Sdecode_char);
2123 defsubr (&Sencode_char);
2124 defsubr (&Ssplit_char);
2125 defsubr (&Smake_char);
2126 defsubr (&Schar_charset);
2127 defsubr (&Scharset_after);
2128 defsubr (&Siso_charset);
2129 defsubr (&Sclear_charset_maps);
2130 defsubr (&Scharset_priority_list);
2131 defsubr (&Sset_charset_priority);
2132 defsubr (&Scharset_id_internal);
2133
2134 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2135 doc: /* *Lisp of directories to search for charset map files. */);
2136 Vcharset_map_path = Qnil;
2137
2138 DEFVAR_LISP ("charset-list", &Vcharset_list,
2139 doc: /* List of all charsets ever defined. */);
2140 Vcharset_list = Qnil;
2141
2142 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2143 doc: /* ISO639 language mnemonic symbol for the current language environment.
2144 If the current language environment is for multiple languages (e.g. "Latin-1"),
2145 the value may be a list of mnemonics. */);
2146 Vcurrent_iso639_language = Qnil;
2147
2148 charset_ascii
2149 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2150 0, 127, 'B', -1, 0, 1, 0, 0);
2151 charset_iso_8859_1
2152 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2153 0, 255, -1, -1, -1, 1, 0, 0);
2154 charset_unicode
2155 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2156 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2157 charset_eight_bit
2158 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2159 128, 255, -1, 0, -1, 0, 1,
2160 MAX_5_BYTE_CHAR + 1);
2161 }
2162
2163 #endif /* emacs */
2164
2165 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2166 (do not change this comment) */