]> code.delx.au - gnu-emacs/blob - src/charset.c
Merge from emacs--devo--0
[gnu-emacs] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
3 2006, 2007, 2008 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software; you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation; either version 3, or (at your option)
18 any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs; see the file COPYING. If not, write to
27 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
28 Boston, MA 02110-1301, USA. */
29
30 #include <config.h>
31
32 #include <stdio.h>
33 #include <unistd.h>
34 #include <ctype.h>
35 #include <sys/types.h>
36 #include "lisp.h"
37 #include "character.h"
38 #include "charset.h"
39 #include "coding.h"
40 #include "disptab.h"
41 #include "buffer.h"
42
43 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
44
45 A coded character set ("charset" hereafter) is a meaningful
46 collection (i.e. language, culture, functionality, etc.) of
47 characters. Emacs handles multiple charsets at once. In Emacs Lisp
48 code, a charset is represented by a symbol. In C code, a charset is
49 represented by its ID number or by a pointer to a struct charset.
50
51 The actual information about each charset is stored in two places.
52 Lispy information is stored in the hash table Vcharset_hash_table as
53 a vector (charset attributes). The other information is stored in
54 charset_table as a struct charset.
55
56 */
57
58 /* List of all charsets. This variable is used only from Emacs
59 Lisp. */
60 Lisp_Object Vcharset_list;
61
62 /* Hash table that contains attributes of each charset. Keys are
63 charset symbols, and values are vectors of charset attributes. */
64 Lisp_Object Vcharset_hash_table;
65
66 /* Table of struct charset. */
67 struct charset *charset_table;
68
69 static int charset_table_size;
70 static int charset_table_used;
71
72 Lisp_Object Qcharsetp;
73
74 /* Special charset symbols. */
75 Lisp_Object Qascii;
76 Lisp_Object Qeight_bit;
77 Lisp_Object Qiso_8859_1;
78 Lisp_Object Qunicode;
79
80 /* The corresponding charsets. */
81 int charset_ascii;
82 int charset_eight_bit;
83 int charset_iso_8859_1;
84 int charset_unicode;
85
86 /* The other special charsets. */
87 int charset_jisx0201_roman;
88 int charset_jisx0208_1978;
89 int charset_jisx0208;
90
91 /* Value of charset attribute `charset-iso-plane'. */
92 Lisp_Object Qgl, Qgr;
93
94 /* Charset of unibyte characters. */
95 int charset_unibyte;
96
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list;
99
100 /* Incremented everytime we change Vcharset_ordered_list. This is
101 unsigned short so that it fits in Lisp_Int and never matches
102 -1. */
103 unsigned short charset_ordered_list_tick;
104
105 /* List of iso-2022 charsets. */
106 Lisp_Object Viso_2022_charset_list;
107
108 /* List of emacs-mule charsets. */
109 Lisp_Object Vemacs_mule_charset_list;
110
111 struct charset *emacs_mule_charset[256];
112
113 /* Mapping table from ISO2022's charset (specified by DIMENSION,
114 CHARS, and FINAL-CHAR) to Emacs' charset. */
115 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
116
117 Lisp_Object Vcharset_map_path;
118
119 Lisp_Object Vchar_unified_charset_table;
120
121 /* Defined in chartab.c */
122 extern void
123 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
124 Lisp_Object function, Lisp_Object table,
125 Lisp_Object arg, struct charset *charset,
126 unsigned from, unsigned to));
127
128 #define CODE_POINT_TO_INDEX(charset, code) \
129 ((charset)->code_linear_p \
130 ? (code) - (charset)->min_code \
131 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
132 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
133 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
134 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
135 ? (((((code) >> 24) - (charset)->code_space[12]) \
136 * (charset)->code_space[11]) \
137 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
138 * (charset)->code_space[7]) \
139 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
140 * (charset)->code_space[3]) \
141 + (((code) & 0xFF) - (charset)->code_space[0]) \
142 - ((charset)->char_index_offset)) \
143 : -1)
144
145
146 /* Convert the character index IDX to code-point CODE for CHARSET.
147 It is assumed that IDX is in a valid range. */
148
149 #define INDEX_TO_CODE_POINT(charset, idx) \
150 ((charset)->code_linear_p \
151 ? (idx) + (charset)->min_code \
152 : (idx += (charset)->char_index_offset, \
153 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
154 | (((charset)->code_space[4] \
155 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
156 << 8) \
157 | (((charset)->code_space[8] \
158 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
159 << 16) \
160 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
161 << 24))))
162
163
164 \f
165
166 /* Set to 1 to warn that a charset map is loaded and thus a buffer
167 text and a string data may be relocated. */
168 int charset_map_loaded;
169
170 struct charset_map_entries
171 {
172 struct {
173 unsigned from, to;
174 int c;
175 } entry[0x10000];
176 struct charset_map_entries *next;
177 };
178
179 /* Load the mapping information for CHARSET from ENTRIES.
180
181 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
182
183 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
184 CHARSET->decoder, and CHARSET->encoder.
185
186 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
187 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
188 setup it too. */
189
190 static void
191 load_charset_map (charset, entries, n_entries, control_flag)
192 struct charset *charset;
193 struct charset_map_entries *entries;
194 int n_entries;
195 int control_flag;
196 {
197 Lisp_Object vec, table;
198 unsigned max_code = CHARSET_MAX_CODE (charset);
199 int ascii_compatible_p = charset->ascii_compatible_p;
200 int min_char, max_char, nonascii_min_char;
201 int i;
202 unsigned char *fast_map = charset->fast_map;
203
204 if (n_entries <= 0)
205 return;
206
207 if (control_flag > 0)
208 {
209 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
210
211 table = Fmake_char_table (Qnil, Qnil);
212 if (control_flag == 1)
213 vec = Fmake_vector (make_number (n), make_number (-1));
214 else if (! CHAR_TABLE_P (Vchar_unify_table))
215 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
216
217 charset_map_loaded = 1;
218 }
219
220 min_char = max_char = entries->entry[0].c;
221 nonascii_min_char = MAX_CHAR;
222 for (i = 0; i < n_entries; i++)
223 {
224 unsigned from, to;
225 int from_index, to_index;
226 int from_c, to_c;
227 int idx = i % 0x10000;
228
229 if (i > 0 && idx == 0)
230 entries = entries->next;
231 from = entries->entry[idx].from;
232 to = entries->entry[idx].to;
233 from_c = entries->entry[idx].c;
234 from_index = CODE_POINT_TO_INDEX (charset, from);
235 if (from == to)
236 {
237 to_index = from_index;
238 to_c = from_c;
239 }
240 else
241 {
242 to_index = CODE_POINT_TO_INDEX (charset, to);
243 to_c = from_c + (to_index - from_index);
244 }
245 if (from_index < 0 || to_index < 0)
246 continue;
247
248 if (control_flag < 2)
249 {
250 int c;
251
252 if (to_c > max_char)
253 max_char = to_c;
254 else if (from_c < min_char)
255 min_char = from_c;
256 if (ascii_compatible_p)
257 {
258 if (! ASCII_BYTE_P (from_c))
259 {
260 if (from_c < nonascii_min_char)
261 nonascii_min_char = from_c;
262 }
263 else if (! ASCII_BYTE_P (to_c))
264 {
265 nonascii_min_char = 0x80;
266 }
267 }
268
269 for (c = from_c; c <= to_c; c++)
270 CHARSET_FAST_MAP_SET (c, fast_map);
271
272 if (control_flag == 1)
273 {
274 unsigned code = from;
275
276 if (CHARSET_COMPACT_CODES_P (charset))
277 while (1)
278 {
279 ASET (vec, from_index, make_number (from_c));
280 if (NILP (CHAR_TABLE_REF (table, from_c)))
281 CHAR_TABLE_SET (table, from_c, make_number (code));
282 if (from_index == to_index)
283 break;
284 from_index++, from_c++;
285 code = INDEX_TO_CODE_POINT (charset, from_index);
286 }
287 else
288 for (; from_index <= to_index; from_index++, from_c++)
289 {
290 ASET (vec, from_index, make_number (from_c));
291 if (NILP (CHAR_TABLE_REF (table, from_c)))
292 CHAR_TABLE_SET (table, from_c, make_number (from_index));
293 }
294 }
295 }
296 else
297 {
298 unsigned code = from;
299
300 while (1)
301 {
302 int c1 = DECODE_CHAR (charset, code);
303
304 if (c1 >= 0)
305 {
306 CHAR_TABLE_SET (table, from_c, make_number (c1));
307 CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
308 if (CHAR_TABLE_P (Vchar_unified_charset_table))
309 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
310 CHARSET_NAME (charset));
311 }
312 if (from_index == to_index)
313 break;
314 from_index++, from_c++;
315 code = INDEX_TO_CODE_POINT (charset, from_index);
316 }
317 }
318 }
319
320 if (control_flag < 2)
321 {
322 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
323 ? nonascii_min_char : min_char);
324 CHARSET_MAX_CHAR (charset) = max_char;
325 if (control_flag == 1)
326 {
327 CHARSET_DECODER (charset) = vec;
328 CHARSET_ENCODER (charset) = table;
329 }
330 }
331 else
332 CHARSET_DEUNIFIER (charset) = table;
333 }
334
335
336 /* Read a hexadecimal number (preceded by "0x") from the file FP while
337 paying attention to comment charcter '#'. */
338
339 static INLINE unsigned
340 read_hex (fp, eof)
341 FILE *fp;
342 int *eof;
343 {
344 int c;
345 unsigned n;
346
347 while ((c = getc (fp)) != EOF)
348 {
349 if (c == '#')
350 {
351 while ((c = getc (fp)) != EOF && c != '\n');
352 }
353 else if (c == '0')
354 {
355 if ((c = getc (fp)) == EOF || c == 'x')
356 break;
357 }
358 }
359 if (c == EOF)
360 {
361 *eof = 1;
362 return 0;
363 }
364 *eof = 0;
365 n = 0;
366 if (c == 'x')
367 while ((c = getc (fp)) != EOF && isxdigit (c))
368 n = ((n << 4)
369 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
370 else
371 while ((c = getc (fp)) != EOF && isdigit (c))
372 n = (n * 10) + c - '0';
373 if (c != EOF)
374 ungetc (c, fp);
375 return n;
376 }
377
378
379 /* Return a mapping vector for CHARSET loaded from MAPFILE.
380 Each line of MAPFILE has this form
381 0xAAAA 0xCCCC
382 where 0xAAAA is a code-point and 0xCCCC is the corresponding
383 character code, or this form
384 0xAAAA-0xBBBB 0xCCCC
385 where 0xAAAA and 0xBBBB are code-points specifying a range, and
386 0xCCCC is the first character code of the range.
387
388 The returned vector has this form:
389 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
390 where CODE1 is a code-point or a cons of code-points specifying a
391 range. */
392
393 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
394
395 static void
396 load_charset_map_from_file (charset, mapfile, control_flag)
397 struct charset *charset;
398 Lisp_Object mapfile;
399 int control_flag;
400 {
401 unsigned min_code = CHARSET_MIN_CODE (charset);
402 unsigned max_code = CHARSET_MAX_CODE (charset);
403 int fd;
404 FILE *fp;
405 int eof;
406 Lisp_Object suffixes;
407 struct charset_map_entries *head, *entries;
408 int n_entries;
409
410 suffixes = Fcons (build_string (".map"),
411 Fcons (build_string (".TXT"), Qnil));
412
413 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
414 if (fd < 0
415 || ! (fp = fdopen (fd, "r")))
416 {
417 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
418 return;
419 }
420
421 head = entries = ((struct charset_map_entries *)
422 alloca (sizeof (struct charset_map_entries)));
423 n_entries = 0;
424 eof = 0;
425 while (1)
426 {
427 unsigned from, to;
428 int c;
429 int idx;
430
431 from = read_hex (fp, &eof);
432 if (eof)
433 break;
434 if (getc (fp) == '-')
435 to = read_hex (fp, &eof);
436 else
437 to = from;
438 c = (int) read_hex (fp, &eof);
439
440 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
441 continue;
442
443 if (n_entries > 0 && (n_entries % 0x10000) == 0)
444 {
445 entries->next = ((struct charset_map_entries *)
446 alloca (sizeof (struct charset_map_entries)));
447 entries = entries->next;
448 }
449 idx = n_entries % 0x10000;
450 entries->entry[idx].from = from;
451 entries->entry[idx].to = to;
452 entries->entry[idx].c = c;
453 n_entries++;
454 }
455 fclose (fp);
456 close (fd);
457
458 load_charset_map (charset, head, n_entries, control_flag);
459 }
460
461 static void
462 load_charset_map_from_vector (charset, vec, control_flag)
463 struct charset *charset;
464 Lisp_Object vec;
465 int control_flag;
466 {
467 unsigned min_code = CHARSET_MIN_CODE (charset);
468 unsigned max_code = CHARSET_MAX_CODE (charset);
469 struct charset_map_entries *head, *entries;
470 int n_entries;
471 int len = ASIZE (vec);
472 int i;
473
474 if (len % 2 == 1)
475 {
476 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
477 return;
478 }
479
480 head = entries = ((struct charset_map_entries *)
481 alloca (sizeof (struct charset_map_entries)));
482 n_entries = 0;
483 for (i = 0; i < len; i += 2)
484 {
485 Lisp_Object val, val2;
486 unsigned from, to;
487 int c;
488 int idx;
489
490 val = AREF (vec, i);
491 if (CONSP (val))
492 {
493 val2 = XCDR (val);
494 val = XCAR (val);
495 CHECK_NATNUM (val);
496 CHECK_NATNUM (val2);
497 from = XFASTINT (val);
498 to = XFASTINT (val2);
499 }
500 else
501 {
502 CHECK_NATNUM (val);
503 from = to = XFASTINT (val);
504 }
505 val = AREF (vec, i + 1);
506 CHECK_NATNUM (val);
507 c = XFASTINT (val);
508
509 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
510 continue;
511
512 if (n_entries > 0 && (n_entries % 0x10000) == 0)
513 {
514 entries->next = ((struct charset_map_entries *)
515 alloca (sizeof (struct charset_map_entries)));
516 entries = entries->next;
517 }
518 idx = n_entries % 0x10000;
519 entries->entry[idx].from = from;
520 entries->entry[idx].to = to;
521 entries->entry[idx].c = c;
522 n_entries++;
523 }
524
525 load_charset_map (charset, head, n_entries, control_flag);
526 }
527
528 static void
529 load_charset (charset)
530 struct charset *charset;
531 {
532 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
533 {
534 Lisp_Object map;
535
536 map = CHARSET_MAP (charset);
537 if (STRINGP (map))
538 load_charset_map_from_file (charset, map, 1);
539 else
540 load_charset_map_from_vector (charset, map, 1);
541 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
542 }
543 }
544
545
546 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
547 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
548 (object)
549 Lisp_Object object;
550 {
551 return (CHARSETP (object) ? Qt : Qnil);
552 }
553
554
555 void
556 map_charset_chars (c_function, function, arg,
557 charset, from, to)
558 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
559 Lisp_Object function, arg;
560 struct charset *charset;
561 unsigned from, to;
562 {
563 Lisp_Object range;
564 int partial;
565
566 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
567 load_charset (charset);
568
569 partial = (from > CHARSET_MIN_CODE (charset)
570 || to < CHARSET_MAX_CODE (charset));
571
572 if (CHARSET_UNIFIED_P (charset)
573 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
574 {
575 map_char_table_for_charset (c_function, function,
576 CHARSET_DEUNIFIER (charset), arg,
577 partial ? charset : NULL, from, to);
578 }
579
580 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
581 {
582 int from_idx = CODE_POINT_TO_INDEX (charset, from);
583 int to_idx = CODE_POINT_TO_INDEX (charset, to);
584 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
585 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
586
587 range = Fcons (make_number (from_c), make_number (to_c));
588 if (NILP (function))
589 (*c_function) (arg, range);
590 else
591 call2 (function, range, arg);
592 }
593 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
594 {
595 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
596 return;
597 map_char_table_for_charset (c_function, function,
598 CHARSET_ENCODER (charset), arg,
599 partial ? charset : NULL, from, to);
600 }
601 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
602 {
603 Lisp_Object subset_info;
604 int offset;
605
606 subset_info = CHARSET_SUBSET (charset);
607 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
608 offset = XINT (AREF (subset_info, 3));
609 from -= offset;
610 if (from < XFASTINT (AREF (subset_info, 1)))
611 from = XFASTINT (AREF (subset_info, 1));
612 to -= offset;
613 if (to > XFASTINT (AREF (subset_info, 2)))
614 to = XFASTINT (AREF (subset_info, 2));
615 map_charset_chars (c_function, function, arg, charset, from, to);
616 }
617 else /* i.e. CHARSET_METHOD_SUPERSET */
618 {
619 Lisp_Object parents;
620
621 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
622 parents = XCDR (parents))
623 {
624 int offset;
625 unsigned this_from, this_to;
626
627 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
628 offset = XINT (XCDR (XCAR (parents)));
629 this_from = from - offset;
630 this_to = to - offset;
631 if (this_from < CHARSET_MIN_CODE (charset))
632 this_from = CHARSET_MIN_CODE (charset);
633 if (this_to > CHARSET_MAX_CODE (charset))
634 this_to = CHARSET_MAX_CODE (charset);
635 map_charset_chars (c_function, function, arg, charset,
636 this_from, this_to);
637 }
638 }
639 }
640
641 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
642 doc: /* Call FUNCTION for all characters in CHARSET.
643 FUNCTION is called with an argument RANGE and the optional 3rd
644 argument ARG.
645
646 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
647 characters contained in CHARSET.
648
649 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
650 range of code points of target characters. */)
651 (function, charset, arg, from_code, to_code)
652 Lisp_Object function, charset, arg, from_code, to_code;
653 {
654 struct charset *cs;
655 unsigned from, to;
656
657 CHECK_CHARSET_GET_CHARSET (charset, cs);
658 if (NILP (from_code))
659 from = CHARSET_MIN_CODE (cs);
660 else
661 {
662 CHECK_NATNUM (from_code);
663 from = XINT (from_code);
664 if (from < CHARSET_MIN_CODE (cs))
665 from = CHARSET_MIN_CODE (cs);
666 }
667 if (NILP (to_code))
668 to = CHARSET_MAX_CODE (cs);
669 else
670 {
671 CHECK_NATNUM (to_code);
672 to = XINT (to_code);
673 if (to > CHARSET_MAX_CODE (cs))
674 to = CHARSET_MAX_CODE (cs);
675 }
676 map_charset_chars (NULL, function, arg, cs, from, to);
677 return Qnil;
678 }
679
680
681 /* Define a charset according to the arguments. The Nth argument is
682 the Nth attribute of the charset (the last attribute `charset-id'
683 is not included). See the docstring of `define-charset' for the
684 detail. */
685
686 DEFUN ("define-charset-internal", Fdefine_charset_internal,
687 Sdefine_charset_internal, charset_arg_max, MANY, 0,
688 doc: /* For internal use only.
689 usage: (define-charset-internal ...) */)
690 (nargs, args)
691 int nargs;
692 Lisp_Object *args;
693 {
694 /* Charset attr vector. */
695 Lisp_Object attrs;
696 Lisp_Object val;
697 unsigned hash_code;
698 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
699 int i, j;
700 struct charset charset;
701 int id;
702 int dimension;
703 int new_definition_p;
704 int nchars;
705
706 if (nargs != charset_arg_max)
707 return Fsignal (Qwrong_number_of_arguments,
708 Fcons (intern ("define-charset-internal"),
709 make_number (nargs)));
710
711 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
712
713 CHECK_SYMBOL (args[charset_arg_name]);
714 ASET (attrs, charset_name, args[charset_arg_name]);
715
716 val = args[charset_arg_code_space];
717 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
718 {
719 int min_byte, max_byte;
720
721 min_byte = XINT (Faref (val, make_number (i * 2)));
722 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
723 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
724 error ("Invalid :code-space value");
725 charset.code_space[i * 4] = min_byte;
726 charset.code_space[i * 4 + 1] = max_byte;
727 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
728 nchars *= charset.code_space[i * 4 + 2];
729 charset.code_space[i * 4 + 3] = nchars;
730 if (max_byte > 0)
731 dimension = i + 1;
732 }
733
734 val = args[charset_arg_dimension];
735 if (NILP (val))
736 charset.dimension = dimension;
737 else
738 {
739 CHECK_NATNUM (val);
740 charset.dimension = XINT (val);
741 if (charset.dimension < 1 || charset.dimension > 4)
742 args_out_of_range_3 (val, make_number (1), make_number (4));
743 }
744
745 charset.code_linear_p
746 = (charset.dimension == 1
747 || (charset.code_space[2] == 256
748 && (charset.dimension == 2
749 || (charset.code_space[6] == 256
750 && (charset.dimension == 3
751 || charset.code_space[10] == 256)))));
752
753 if (! charset.code_linear_p)
754 {
755 charset.code_space_mask = (unsigned char *) xmalloc (256);
756 bzero (charset.code_space_mask, 256);
757 for (i = 0; i < 4; i++)
758 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
759 j++)
760 charset.code_space_mask[j] |= (1 << i);
761 }
762
763 charset.iso_chars_96 = charset.code_space[2] == 96;
764
765 charset.min_code = (charset.code_space[0]
766 | (charset.code_space[4] << 8)
767 | (charset.code_space[8] << 16)
768 | (charset.code_space[12] << 24));
769 charset.max_code = (charset.code_space[1]
770 | (charset.code_space[5] << 8)
771 | (charset.code_space[9] << 16)
772 | (charset.code_space[13] << 24));
773 charset.char_index_offset = 0;
774
775 val = args[charset_arg_min_code];
776 if (! NILP (val))
777 {
778 unsigned code;
779
780 if (INTEGERP (val))
781 code = XINT (val);
782 else
783 {
784 CHECK_CONS (val);
785 CHECK_NUMBER_CAR (val);
786 CHECK_NUMBER_CDR (val);
787 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
788 }
789 if (code < charset.min_code
790 || code > charset.max_code)
791 args_out_of_range_3 (make_number (charset.min_code),
792 make_number (charset.max_code), val);
793 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
794 charset.min_code = code;
795 }
796
797 val = args[charset_arg_max_code];
798 if (! NILP (val))
799 {
800 unsigned code;
801
802 if (INTEGERP (val))
803 code = XINT (val);
804 else
805 {
806 CHECK_CONS (val);
807 CHECK_NUMBER_CAR (val);
808 CHECK_NUMBER_CDR (val);
809 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
810 }
811 if (code < charset.min_code
812 || code > charset.max_code)
813 args_out_of_range_3 (make_number (charset.min_code),
814 make_number (charset.max_code), val);
815 charset.max_code = code;
816 }
817
818 charset.compact_codes_p = charset.max_code < 0x1000000;
819
820 val = args[charset_arg_invalid_code];
821 if (NILP (val))
822 {
823 if (charset.min_code > 0)
824 charset.invalid_code = 0;
825 else
826 {
827 XSETINT (val, charset.max_code + 1);
828 if (XINT (val) == charset.max_code + 1)
829 charset.invalid_code = charset.max_code + 1;
830 else
831 error ("Attribute :invalid-code must be specified");
832 }
833 }
834 else
835 {
836 CHECK_NATNUM (val);
837 charset.invalid_code = XFASTINT (val);
838 }
839
840 val = args[charset_arg_iso_final];
841 if (NILP (val))
842 charset.iso_final = -1;
843 else
844 {
845 CHECK_NUMBER (val);
846 if (XINT (val) < '0' || XINT (val) > 127)
847 error ("Invalid iso-final-char: %d", XINT (val));
848 charset.iso_final = XINT (val);
849 }
850
851 val = args[charset_arg_iso_revision];
852 if (NILP (val))
853 charset.iso_revision = -1;
854 else
855 {
856 CHECK_NUMBER (val);
857 if (XINT (val) > 63)
858 args_out_of_range (make_number (63), val);
859 charset.iso_revision = XINT (val);
860 }
861
862 val = args[charset_arg_emacs_mule_id];
863 if (NILP (val))
864 charset.emacs_mule_id = -1;
865 else
866 {
867 CHECK_NATNUM (val);
868 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
869 error ("Invalid emacs-mule-id: %d", XINT (val));
870 charset.emacs_mule_id = XINT (val);
871 }
872
873 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
874
875 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
876
877 charset.unified_p = 0;
878
879 bzero (charset.fast_map, sizeof (charset.fast_map));
880
881 if (! NILP (args[charset_arg_code_offset]))
882 {
883 val = args[charset_arg_code_offset];
884 CHECK_NUMBER (val);
885
886 charset.method = CHARSET_METHOD_OFFSET;
887 charset.code_offset = XINT (val);
888
889 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
890 charset.min_char = i + charset.code_offset;
891 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
892 charset.max_char = i + charset.code_offset;
893 if (charset.max_char > MAX_CHAR)
894 error ("Unsupported max char: %d", charset.max_char);
895
896 i = (charset.min_char >> 7) << 7;
897 for (; i < 0x10000 && i <= charset.max_char; i += 128)
898 CHARSET_FAST_MAP_SET (i, charset.fast_map);
899 i = (i >> 12) << 12;
900 for (; i <= charset.max_char; i += 0x1000)
901 CHARSET_FAST_MAP_SET (i, charset.fast_map);
902 }
903 else if (! NILP (args[charset_arg_map]))
904 {
905 val = args[charset_arg_map];
906 ASET (attrs, charset_map, val);
907 if (STRINGP (val))
908 load_charset_map_from_file (&charset, val, 0);
909 else
910 load_charset_map_from_vector (&charset, val, 0);
911 charset.method = CHARSET_METHOD_MAP_DEFERRED;
912 }
913 else if (! NILP (args[charset_arg_subset]))
914 {
915 Lisp_Object parent;
916 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
917 struct charset *parent_charset;
918
919 val = args[charset_arg_subset];
920 parent = Fcar (val);
921 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
922 parent_min_code = Fnth (make_number (1), val);
923 CHECK_NATNUM (parent_min_code);
924 parent_max_code = Fnth (make_number (2), val);
925 CHECK_NATNUM (parent_max_code);
926 parent_code_offset = Fnth (make_number (3), val);
927 CHECK_NUMBER (parent_code_offset);
928 val = Fmake_vector (make_number (4), Qnil);
929 ASET (val, 0, make_number (parent_charset->id));
930 ASET (val, 1, parent_min_code);
931 ASET (val, 2, parent_max_code);
932 ASET (val, 3, parent_code_offset);
933 ASET (attrs, charset_subset, val);
934
935 charset.method = CHARSET_METHOD_SUBSET;
936 /* Here, we just copy the parent's fast_map. It's not accurate,
937 but at least it works for quickly detecting which character
938 DOESN'T belong to this charset. */
939 for (i = 0; i < 190; i++)
940 charset.fast_map[i] = parent_charset->fast_map[i];
941
942 /* We also copy these for parents. */
943 charset.min_char = parent_charset->min_char;
944 charset.max_char = parent_charset->max_char;
945 }
946 else if (! NILP (args[charset_arg_superset]))
947 {
948 val = args[charset_arg_superset];
949 charset.method = CHARSET_METHOD_SUPERSET;
950 val = Fcopy_sequence (val);
951 ASET (attrs, charset_superset, val);
952
953 charset.min_char = MAX_CHAR;
954 charset.max_char = 0;
955 for (; ! NILP (val); val = Fcdr (val))
956 {
957 Lisp_Object elt, car_part, cdr_part;
958 int this_id, offset;
959 struct charset *this_charset;
960
961 elt = Fcar (val);
962 if (CONSP (elt))
963 {
964 car_part = XCAR (elt);
965 cdr_part = XCDR (elt);
966 CHECK_CHARSET_GET_ID (car_part, this_id);
967 CHECK_NUMBER (cdr_part);
968 offset = XINT (cdr_part);
969 }
970 else
971 {
972 CHECK_CHARSET_GET_ID (elt, this_id);
973 offset = 0;
974 }
975 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
976
977 this_charset = CHARSET_FROM_ID (this_id);
978 if (charset.min_char > this_charset->min_char)
979 charset.min_char = this_charset->min_char;
980 if (charset.max_char < this_charset->max_char)
981 charset.max_char = this_charset->max_char;
982 for (i = 0; i < 190; i++)
983 charset.fast_map[i] |= this_charset->fast_map[i];
984 }
985 }
986 else
987 error ("None of :code-offset, :map, :parents are specified");
988
989 val = args[charset_arg_unify_map];
990 if (! NILP (val) && !STRINGP (val))
991 CHECK_VECTOR (val);
992 ASET (attrs, charset_unify_map, val);
993
994 CHECK_LIST (args[charset_arg_plist]);
995 ASET (attrs, charset_plist, args[charset_arg_plist]);
996
997 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
998 &hash_code);
999 if (charset.hash_index >= 0)
1000 {
1001 new_definition_p = 0;
1002 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1003 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1004 }
1005 else
1006 {
1007 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1008 hash_code);
1009 if (charset_table_used == charset_table_size)
1010 {
1011 struct charset *new_table
1012 = (struct charset *) xmalloc (sizeof (struct charset)
1013 * (charset_table_size + 16));
1014 bcopy (charset_table, new_table,
1015 sizeof (struct charset) * charset_table_size);
1016 charset_table_size += 16;
1017 charset_table = new_table;
1018 }
1019 id = charset_table_used++;
1020 new_definition_p = 1;
1021 }
1022
1023 ASET (attrs, charset_id, make_number (id));
1024 charset.id = id;
1025 charset_table[id] = charset;
1026
1027 if (charset.iso_final >= 0)
1028 {
1029 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1030 charset.iso_final) = id;
1031 if (new_definition_p)
1032 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1033 Fcons (make_number (id), Qnil));
1034 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1035 charset_jisx0201_roman = id;
1036 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1037 charset_jisx0208_1978 = id;
1038 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1039 charset_jisx0208 = id;
1040 }
1041
1042 if (charset.emacs_mule_id >= 0)
1043 {
1044 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1045 if (charset.emacs_mule_id < 0xA0)
1046 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1047 else
1048 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1049 if (new_definition_p)
1050 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1051 Fcons (make_number (id), Qnil));
1052 }
1053
1054 if (new_definition_p)
1055 {
1056 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1057 if (charset.supplementary_p)
1058 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1059 Fcons (make_number (id), Qnil));
1060 else
1061 Vcharset_ordered_list = Fcons (make_number (id),
1062 Vcharset_ordered_list);
1063 charset_ordered_list_tick++;
1064 }
1065
1066 return Qnil;
1067 }
1068
1069
1070 /* Same as Fdefine_charset_internal but arguments are more convenient
1071 to call from C (typically in syms_of_charset). This can define a
1072 charset of `offset' method only. Return the ID of the new
1073 charset. */
1074
1075 static int
1076 define_charset_internal (name, dimension, code_space, min_code, max_code,
1077 iso_final, iso_revision, emacs_mule_id,
1078 ascii_compatible, supplementary,
1079 code_offset)
1080 Lisp_Object name;
1081 int dimension;
1082 unsigned char *code_space;
1083 unsigned min_code, max_code;
1084 int iso_final, iso_revision, emacs_mule_id;
1085 int ascii_compatible, supplementary;
1086 int code_offset;
1087 {
1088 Lisp_Object args[charset_arg_max];
1089 Lisp_Object plist[14];
1090 Lisp_Object val;
1091 int i;
1092
1093 args[charset_arg_name] = name;
1094 args[charset_arg_dimension] = make_number (dimension);
1095 val = Fmake_vector (make_number (8), make_number (0));
1096 for (i = 0; i < 8; i++)
1097 ASET (val, i, make_number (code_space[i]));
1098 args[charset_arg_code_space] = val;
1099 args[charset_arg_min_code] = make_number (min_code);
1100 args[charset_arg_max_code] = make_number (max_code);
1101 args[charset_arg_iso_final]
1102 = (iso_final < 0 ? Qnil : make_number (iso_final));
1103 args[charset_arg_iso_revision] = make_number (iso_revision);
1104 args[charset_arg_emacs_mule_id]
1105 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1106 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1107 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1108 args[charset_arg_invalid_code] = Qnil;
1109 args[charset_arg_code_offset] = make_number (code_offset);
1110 args[charset_arg_map] = Qnil;
1111 args[charset_arg_subset] = Qnil;
1112 args[charset_arg_superset] = Qnil;
1113 args[charset_arg_unify_map] = Qnil;
1114
1115 plist[0] = intern (":name");
1116 plist[1] = args[charset_arg_name];
1117 plist[2] = intern (":dimension");
1118 plist[3] = args[charset_arg_dimension];
1119 plist[4] = intern (":code-space");
1120 plist[5] = args[charset_arg_code_space];
1121 plist[6] = intern (":iso-final-char");
1122 plist[7] = args[charset_arg_iso_final];
1123 plist[8] = intern (":emacs-mule-id");
1124 plist[9] = args[charset_arg_emacs_mule_id];
1125 plist[10] = intern (":ascii-compatible-p");
1126 plist[11] = args[charset_arg_ascii_compatible_p];
1127 plist[12] = intern (":code-offset");
1128 plist[13] = args[charset_arg_code_offset];
1129
1130 args[charset_arg_plist] = Flist (14, plist);
1131 Fdefine_charset_internal (charset_arg_max, args);
1132
1133 return XINT (CHARSET_SYMBOL_ID (name));
1134 }
1135
1136
1137 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1138 Sdefine_charset_alias, 2, 2, 0,
1139 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1140 (alias, charset)
1141 Lisp_Object alias, charset;
1142 {
1143 Lisp_Object attr;
1144
1145 CHECK_CHARSET_GET_ATTR (charset, attr);
1146 Fputhash (alias, attr, Vcharset_hash_table);
1147 Vcharset_list = Fcons (alias, Vcharset_list);
1148 return Qnil;
1149 }
1150
1151
1152 DEFUN ("unibyte-charset", Funibyte_charset, Sunibyte_charset, 0, 0, 0,
1153 doc: /* Return the unibyte charset (set by `set-unibyte-charset'). */)
1154 ()
1155 {
1156 return CHARSET_NAME (CHARSET_FROM_ID (charset_unibyte));
1157 }
1158
1159
1160 DEFUN ("set-unibyte-charset", Fset_unibyte_charset, Sset_unibyte_charset,
1161 1, 1, 0,
1162 doc: /* Set the unibyte charset to CHARSET.
1163 This determines how unibyte/multibyte conversion is done. See also
1164 function `unibyte-charset'. */)
1165 (charset)
1166 Lisp_Object charset;
1167 {
1168 struct charset *cs;
1169 int i, c;
1170
1171 CHECK_CHARSET_GET_CHARSET (charset, cs);
1172 if (! cs->ascii_compatible_p
1173 || cs->dimension != 1)
1174 error ("Inappropriate unibyte charset: %s", SDATA (SYMBOL_NAME (charset)));
1175 charset_unibyte = cs->id;
1176 memset (unibyte_has_multibyte_table, 1, 128);
1177 for (i = 128; i < 256; i++)
1178 {
1179 c = DECODE_CHAR (cs, i);
1180 unibyte_to_multibyte_table[i] = (c < 0 ? BYTE8_TO_CHAR (i) : c);
1181 unibyte_has_multibyte_table[i] = c >= 0;
1182 }
1183
1184 return Qnil;
1185 }
1186
1187
1188 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1189 doc: /* Return the property list of CHARSET. */)
1190 (charset)
1191 Lisp_Object charset;
1192 {
1193 Lisp_Object attrs;
1194
1195 CHECK_CHARSET_GET_ATTR (charset, attrs);
1196 return CHARSET_ATTR_PLIST (attrs);
1197 }
1198
1199
1200 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1201 doc: /* Set CHARSET's property list to PLIST. */)
1202 (charset, plist)
1203 Lisp_Object charset, plist;
1204 {
1205 Lisp_Object attrs;
1206
1207 CHECK_CHARSET_GET_ATTR (charset, attrs);
1208 CHARSET_ATTR_PLIST (attrs) = plist;
1209 return plist;
1210 }
1211
1212
1213 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1214 doc: /* Unify characters of CHARSET with Unicode.
1215 This means reading the relevant file and installing the table defined
1216 by CHARSET's `:unify-map' property.
1217
1218 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1219 the same meaning as the `:unify-map' attribute in the function
1220 `define-charset' (which see).
1221
1222 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1223 (charset, unify_map, deunify)
1224 Lisp_Object charset, unify_map, deunify;
1225 {
1226 int id;
1227 struct charset *cs;
1228
1229 CHECK_CHARSET_GET_ID (charset, id);
1230 cs = CHARSET_FROM_ID (id);
1231 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1232 load_charset (cs);
1233 if (NILP (deunify)
1234 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1235 : ! CHARSET_UNIFIED_P (cs))
1236 return Qnil;
1237
1238 CHARSET_UNIFIED_P (cs) = 0;
1239 if (NILP (deunify))
1240 {
1241 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
1242 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1243 if (NILP (unify_map))
1244 unify_map = CHARSET_UNIFY_MAP (cs);
1245 if (STRINGP (unify_map))
1246 load_charset_map_from_file (cs, unify_map, 2);
1247 else if (VECTORP (unify_map))
1248 load_charset_map_from_vector (cs, unify_map, 2);
1249 else if (NILP (unify_map))
1250 error ("No unify-map for charset");
1251 else
1252 error ("Bad unify-map arg");
1253 CHARSET_UNIFIED_P (cs) = 1;
1254 }
1255 else if (CHAR_TABLE_P (Vchar_unify_table))
1256 {
1257 int min_code = CHARSET_MIN_CODE (cs);
1258 int max_code = CHARSET_MAX_CODE (cs);
1259 int min_char = DECODE_CHAR (cs, min_code);
1260 int max_char = DECODE_CHAR (cs, max_code);
1261
1262 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1263 }
1264
1265 return Qnil;
1266 }
1267
1268 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1269 Sget_unused_iso_final_char, 2, 2, 0,
1270 doc: /*
1271 Return an unused ISO final char for a charset of DIMENISION and CHARS.
1272 DIMENSION is the number of bytes to represent a character: 1 or 2.
1273 CHARS is the number of characters in a dimension: 94 or 96.
1274
1275 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1276 If there's no unused final char for the specified kind of charset,
1277 return nil. */)
1278 (dimension, chars)
1279 Lisp_Object dimension, chars;
1280 {
1281 int final_char;
1282
1283 CHECK_NUMBER (dimension);
1284 CHECK_NUMBER (chars);
1285 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1286 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1287 if (XINT (chars) != 94 && XINT (chars) != 96)
1288 args_out_of_range_3 (chars, make_number (94), make_number (96));
1289 for (final_char = '0'; final_char <= '?'; final_char++)
1290 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1291 break;
1292 return (final_char <= '?' ? make_number (final_char) : Qnil);
1293 }
1294
1295 static void
1296 check_iso_charset_parameter (dimension, chars, final_char)
1297 Lisp_Object dimension, chars, final_char;
1298 {
1299 CHECK_NATNUM (dimension);
1300 CHECK_NATNUM (chars);
1301 CHECK_NATNUM (final_char);
1302
1303 if (XINT (dimension) > 3)
1304 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1305 if (XINT (chars) != 94 && XINT (chars) != 96)
1306 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1307 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1308 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1309 }
1310
1311
1312 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1313 4, 4, 0,
1314 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1315
1316 On decoding by an ISO-2022 base coding system, when a charset
1317 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1318 if CHARSET is designated instead. */)
1319 (dimension, chars, final_char, charset)
1320 Lisp_Object dimension, chars, final_char, charset;
1321 {
1322 int id;
1323 int chars_flag;
1324
1325 CHECK_CHARSET_GET_ID (charset, id);
1326 check_iso_charset_parameter (dimension, chars, final_char);
1327 chars_flag = XINT (chars) == 96;
1328 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1329 return Qnil;
1330 }
1331
1332
1333 /* Return information about charsets in the text at PTR of NBYTES
1334 bytes, which are NCHARS characters. The value is:
1335
1336 0: Each character is represented by one byte. This is always
1337 true for a unibyte string. For a multibyte string, true if
1338 it contains only ASCII characters.
1339
1340 1: No charsets other than ascii, control-1, and latin-1 are
1341 found.
1342
1343 2: Otherwise.
1344 */
1345
1346 int
1347 string_xstring_p (string)
1348 Lisp_Object string;
1349 {
1350 const unsigned char *p = SDATA (string);
1351 const unsigned char *endp = p + SBYTES (string);
1352
1353 if (SCHARS (string) == SBYTES (string))
1354 return 0;
1355
1356 while (p < endp)
1357 {
1358 int c = STRING_CHAR_ADVANCE (p);
1359
1360 if (c >= 0x100)
1361 return 2;
1362 }
1363 return 1;
1364 }
1365
1366
1367 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1368
1369 CHARSETS is a vector. If Nth element is non-nil, it means the
1370 charset whose id is N is already found.
1371
1372 It may lookup a translation table TABLE if supplied. */
1373
1374 static void
1375 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1376 const unsigned char *ptr;
1377 EMACS_INT nchars, nbytes;
1378 Lisp_Object charsets, table;
1379 int multibyte;
1380 {
1381 const unsigned char *pend = ptr + nbytes;
1382
1383 if (nchars == nbytes)
1384 {
1385 if (multibyte)
1386 ASET (charsets, charset_ascii, Qt);
1387 else
1388 while (ptr < pend)
1389 {
1390 int c = *ptr++;
1391
1392 if (!NILP (table))
1393 c = translate_char (table, c);
1394 if (ASCII_BYTE_P (c))
1395 ASET (charsets, charset_ascii, Qt);
1396 else
1397 ASET (charsets, charset_eight_bit, Qt);
1398 }
1399 }
1400 else
1401 {
1402 while (ptr < pend)
1403 {
1404 int c = STRING_CHAR_ADVANCE (ptr);
1405 struct charset *charset;
1406
1407 if (!NILP (table))
1408 c = translate_char (table, c);
1409 charset = CHAR_CHARSET (c);
1410 ASET (charsets, CHARSET_ID (charset), Qt);
1411 }
1412 }
1413 }
1414
1415 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1416 2, 3, 0,
1417 doc: /* Return a list of charsets in the region between BEG and END.
1418 BEG and END are buffer positions.
1419 Optional arg TABLE if non-nil is a translation table to look up.
1420
1421 If the current buffer is unibyte, the returned list may contain
1422 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1423 (beg, end, table)
1424 Lisp_Object beg, end, table;
1425 {
1426 Lisp_Object charsets;
1427 EMACS_INT from, from_byte, to, stop, stop_byte;
1428 int i;
1429 Lisp_Object val;
1430 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1431
1432 validate_region (&beg, &end);
1433 from = XFASTINT (beg);
1434 stop = to = XFASTINT (end);
1435
1436 if (from < GPT && GPT < to)
1437 {
1438 stop = GPT;
1439 stop_byte = GPT_BYTE;
1440 }
1441 else
1442 stop_byte = CHAR_TO_BYTE (stop);
1443
1444 from_byte = CHAR_TO_BYTE (from);
1445
1446 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1447 while (1)
1448 {
1449 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1450 stop_byte - from_byte, charsets, table,
1451 multibyte);
1452 if (stop < to)
1453 {
1454 from = stop, from_byte = stop_byte;
1455 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1456 }
1457 else
1458 break;
1459 }
1460
1461 val = Qnil;
1462 for (i = charset_table_used - 1; i >= 0; i--)
1463 if (!NILP (AREF (charsets, i)))
1464 val = Fcons (CHARSET_NAME (charset_table + i), val);
1465 return val;
1466 }
1467
1468 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1469 1, 2, 0,
1470 doc: /* Return a list of charsets in STR.
1471 Optional arg TABLE if non-nil is a translation table to look up.
1472
1473 If STR is unibyte, the returned list may contain
1474 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1475 (str, table)
1476 Lisp_Object str, table;
1477 {
1478 Lisp_Object charsets;
1479 int i;
1480 Lisp_Object val;
1481
1482 CHECK_STRING (str);
1483
1484 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1485 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1486 charsets, table,
1487 STRING_MULTIBYTE (str));
1488 val = Qnil;
1489 for (i = charset_table_used - 1; i >= 0; i--)
1490 if (!NILP (AREF (charsets, i)))
1491 val = Fcons (CHARSET_NAME (charset_table + i), val);
1492 return val;
1493 }
1494
1495 \f
1496
1497 /* Return a character correponding to the code-point CODE of
1498 CHARSET. */
1499
1500 int
1501 decode_char (charset, code)
1502 struct charset *charset;
1503 unsigned code;
1504 {
1505 int c, char_index;
1506 enum charset_method method = CHARSET_METHOD (charset);
1507
1508 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1509 return -1;
1510
1511 if (method == CHARSET_METHOD_MAP_DEFERRED)
1512 {
1513 load_charset (charset);
1514 method = CHARSET_METHOD (charset);
1515 }
1516
1517 if (method == CHARSET_METHOD_SUBSET)
1518 {
1519 Lisp_Object subset_info;
1520
1521 subset_info = CHARSET_SUBSET (charset);
1522 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1523 code -= XINT (AREF (subset_info, 3));
1524 if (code < XFASTINT (AREF (subset_info, 1))
1525 || code > XFASTINT (AREF (subset_info, 2)))
1526 c = -1;
1527 else
1528 c = DECODE_CHAR (charset, code);
1529 }
1530 else if (method == CHARSET_METHOD_SUPERSET)
1531 {
1532 Lisp_Object parents;
1533
1534 parents = CHARSET_SUPERSET (charset);
1535 c = -1;
1536 for (; CONSP (parents); parents = XCDR (parents))
1537 {
1538 int id = XINT (XCAR (XCAR (parents)));
1539 int code_offset = XINT (XCDR (XCAR (parents)));
1540 unsigned this_code = code - code_offset;
1541
1542 charset = CHARSET_FROM_ID (id);
1543 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1544 break;
1545 }
1546 }
1547 else
1548 {
1549 char_index = CODE_POINT_TO_INDEX (charset, code);
1550 if (char_index < 0)
1551 return -1;
1552
1553 if (method == CHARSET_METHOD_MAP)
1554 {
1555 Lisp_Object decoder;
1556
1557 decoder = CHARSET_DECODER (charset);
1558 if (! VECTORP (decoder))
1559 return -1;
1560 c = XINT (AREF (decoder, char_index));
1561 }
1562 else
1563 {
1564 c = char_index + CHARSET_CODE_OFFSET (charset);
1565 }
1566 }
1567
1568 if (CHARSET_UNIFIED_P (charset)
1569 && c >= 0)
1570 {
1571 MAYBE_UNIFY_CHAR (c);
1572 }
1573
1574 return c;
1575 }
1576
1577 /* Variable used temporarily by the macro ENCODE_CHAR. */
1578 Lisp_Object charset_work;
1579
1580 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1581 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1582 use CHARSET's strict_max_char instead of max_char. */
1583
1584 unsigned
1585 encode_char (charset, c)
1586 struct charset *charset;
1587 int c;
1588 {
1589 unsigned code;
1590 enum charset_method method = CHARSET_METHOD (charset);
1591
1592 if (CHARSET_UNIFIED_P (charset))
1593 {
1594 Lisp_Object deunifier, deunified;
1595
1596 deunifier = CHARSET_DEUNIFIER (charset);
1597 if (! CHAR_TABLE_P (deunifier))
1598 {
1599 Funify_charset (CHARSET_NAME (charset), Qnil, Qnil);
1600 deunifier = CHARSET_DEUNIFIER (charset);
1601 }
1602 deunified = CHAR_TABLE_REF (deunifier, c);
1603 if (! NILP (deunified))
1604 c = XINT (deunified);
1605 }
1606
1607 if (method == CHARSET_METHOD_SUBSET)
1608 {
1609 Lisp_Object subset_info;
1610 struct charset *this_charset;
1611
1612 subset_info = CHARSET_SUBSET (charset);
1613 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1614 code = ENCODE_CHAR (this_charset, c);
1615 if (code == CHARSET_INVALID_CODE (this_charset)
1616 || code < XFASTINT (AREF (subset_info, 1))
1617 || code > XFASTINT (AREF (subset_info, 2)))
1618 return CHARSET_INVALID_CODE (charset);
1619 code += XINT (AREF (subset_info, 3));
1620 return code;
1621 }
1622
1623 if (method == CHARSET_METHOD_SUPERSET)
1624 {
1625 Lisp_Object parents;
1626
1627 parents = CHARSET_SUPERSET (charset);
1628 for (; CONSP (parents); parents = XCDR (parents))
1629 {
1630 int id = XINT (XCAR (XCAR (parents)));
1631 int code_offset = XINT (XCDR (XCAR (parents)));
1632 struct charset *this_charset = CHARSET_FROM_ID (id);
1633
1634 code = ENCODE_CHAR (this_charset, c);
1635 if (code != CHARSET_INVALID_CODE (this_charset))
1636 return code + code_offset;
1637 }
1638 return CHARSET_INVALID_CODE (charset);
1639 }
1640
1641 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1642 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1643 return CHARSET_INVALID_CODE (charset);
1644
1645 if (method == CHARSET_METHOD_MAP_DEFERRED)
1646 {
1647 load_charset (charset);
1648 method = CHARSET_METHOD (charset);
1649 }
1650
1651 if (method == CHARSET_METHOD_MAP)
1652 {
1653 Lisp_Object encoder;
1654 Lisp_Object val;
1655
1656 encoder = CHARSET_ENCODER (charset);
1657 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1658 return CHARSET_INVALID_CODE (charset);
1659 val = CHAR_TABLE_REF (encoder, c);
1660 if (NILP (val))
1661 return CHARSET_INVALID_CODE (charset);
1662 code = XINT (val);
1663 if (! CHARSET_COMPACT_CODES_P (charset))
1664 code = INDEX_TO_CODE_POINT (charset, code);
1665 }
1666 else /* method == CHARSET_METHOD_OFFSET */
1667 {
1668 code = c - CHARSET_CODE_OFFSET (charset);
1669 code = INDEX_TO_CODE_POINT (charset, code);
1670 }
1671
1672 return code;
1673 }
1674
1675
1676 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1677 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1678 Return nil if CODE-POINT is not valid in CHARSET.
1679
1680 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1681
1682 Optional argument RESTRICTION specifies a way to map the pair of CCS
1683 and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1684 (charset, code_point, restriction)
1685 Lisp_Object charset, code_point, restriction;
1686 {
1687 int c, id;
1688 unsigned code;
1689 struct charset *charsetp;
1690
1691 CHECK_CHARSET_GET_ID (charset, id);
1692 if (CONSP (code_point))
1693 {
1694 CHECK_NATNUM_CAR (code_point);
1695 CHECK_NATNUM_CDR (code_point);
1696 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1697 }
1698 else
1699 {
1700 CHECK_NATNUM (code_point);
1701 code = XINT (code_point);
1702 }
1703 charsetp = CHARSET_FROM_ID (id);
1704 c = DECODE_CHAR (charsetp, code);
1705 return (c >= 0 ? make_number (c) : Qnil);
1706 }
1707
1708
1709 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1710 doc: /* Encode the character CH into a code-point of CHARSET.
1711 Return nil if CHARSET doesn't include CH.
1712
1713 Optional argument RESTRICTION specifies a way to map CHAR to a
1714 code-point in CCS. Currently not supported and just ignored. */)
1715 (ch, charset, restriction)
1716 Lisp_Object ch, charset, restriction;
1717 {
1718 int id;
1719 unsigned code;
1720 struct charset *charsetp;
1721
1722 CHECK_CHARSET_GET_ID (charset, id);
1723 CHECK_NATNUM (ch);
1724 charsetp = CHARSET_FROM_ID (id);
1725 code = ENCODE_CHAR (charsetp, XINT (ch));
1726 if (code == CHARSET_INVALID_CODE (charsetp))
1727 return Qnil;
1728 if (code > 0x7FFFFFF)
1729 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1730 return make_number (code);
1731 }
1732
1733
1734 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1735 doc:
1736 /* Return a character of CHARSET whose position codes are CODEn.
1737
1738 CODE1 through CODE4 are optional, but if you don't supply sufficient
1739 position codes, it is assumed that the minimum code in each dimension
1740 is specified. */)
1741 (charset, code1, code2, code3, code4)
1742 Lisp_Object charset, code1, code2, code3, code4;
1743 {
1744 int id, dimension;
1745 struct charset *charsetp;
1746 unsigned code;
1747 int c;
1748
1749 CHECK_CHARSET_GET_ID (charset, id);
1750 charsetp = CHARSET_FROM_ID (id);
1751
1752 dimension = CHARSET_DIMENSION (charsetp);
1753 if (NILP (code1))
1754 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1755 ? 0 : CHARSET_MIN_CODE (charsetp));
1756 else
1757 {
1758 CHECK_NATNUM (code1);
1759 if (XFASTINT (code1) >= 0x100)
1760 args_out_of_range (make_number (0xFF), code1);
1761 code = XFASTINT (code1);
1762
1763 if (dimension > 1)
1764 {
1765 code <<= 8;
1766 if (NILP (code2))
1767 code |= charsetp->code_space[(dimension - 2) * 4];
1768 else
1769 {
1770 CHECK_NATNUM (code2);
1771 if (XFASTINT (code2) >= 0x100)
1772 args_out_of_range (make_number (0xFF), code2);
1773 code |= XFASTINT (code2);
1774 }
1775
1776 if (dimension > 2)
1777 {
1778 code <<= 8;
1779 if (NILP (code3))
1780 code |= charsetp->code_space[(dimension - 3) * 4];
1781 else
1782 {
1783 CHECK_NATNUM (code3);
1784 if (XFASTINT (code3) >= 0x100)
1785 args_out_of_range (make_number (0xFF), code3);
1786 code |= XFASTINT (code3);
1787 }
1788
1789 if (dimension > 3)
1790 {
1791 code <<= 8;
1792 if (NILP (code4))
1793 code |= charsetp->code_space[0];
1794 else
1795 {
1796 CHECK_NATNUM (code4);
1797 if (XFASTINT (code4) >= 0x100)
1798 args_out_of_range (make_number (0xFF), code4);
1799 code |= XFASTINT (code4);
1800 }
1801 }
1802 }
1803 }
1804 }
1805
1806 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1807 code &= 0x7F7F7F7F;
1808 c = DECODE_CHAR (charsetp, code);
1809 if (c < 0)
1810 error ("Invalid code(s)");
1811 return make_number (c);
1812 }
1813
1814
1815 /* Return the first charset in CHARSET_LIST that contains C.
1816 CHARSET_LIST is a list of charset IDs. If it is nil, use
1817 Vcharset_ordered_list. */
1818
1819 struct charset *
1820 char_charset (c, charset_list, code_return)
1821 int c;
1822 Lisp_Object charset_list;
1823 unsigned *code_return;
1824 {
1825 if (NILP (charset_list))
1826 charset_list = Vcharset_ordered_list;
1827
1828 while (CONSP (charset_list))
1829 {
1830 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1831 unsigned code = ENCODE_CHAR (charset, c);
1832
1833 if (code != CHARSET_INVALID_CODE (charset))
1834 {
1835 if (code_return)
1836 *code_return = code;
1837 return charset;
1838 }
1839 charset_list = XCDR (charset_list);
1840 }
1841 return NULL;
1842 }
1843
1844
1845 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1846 doc:
1847 /*Return list of charset and one to four position-codes of CHAR.
1848 The charset is decided by the current priority order of charsets.
1849 A position-code is a byte value of each dimension of the code-point of
1850 CHAR in the charset. */)
1851 (ch)
1852 Lisp_Object ch;
1853 {
1854 struct charset *charset;
1855 int c, dimension;
1856 unsigned code;
1857 Lisp_Object val;
1858
1859 CHECK_CHARACTER (ch);
1860 c = XFASTINT (ch);
1861 charset = CHAR_CHARSET (c);
1862 if (! charset)
1863 abort ();
1864 code = ENCODE_CHAR (charset, c);
1865 if (code == CHARSET_INVALID_CODE (charset))
1866 abort ();
1867 dimension = CHARSET_DIMENSION (charset);
1868 for (val = Qnil; dimension > 0; dimension--)
1869 {
1870 val = Fcons (make_number (code & 0xFF), val);
1871 code >>= 8;
1872 }
1873 return Fcons (CHARSET_NAME (charset), val);
1874 }
1875
1876
1877 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1878 doc: /* Return the charset of highest priority that contains CH. */)
1879 (ch)
1880 Lisp_Object ch;
1881 {
1882 struct charset *charset;
1883
1884 CHECK_CHARACTER (ch);
1885 charset = CHAR_CHARSET (XINT (ch));
1886 return (CHARSET_NAME (charset));
1887 }
1888
1889
1890 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1891 doc: /*
1892 Return charset of a character in the current buffer at position POS.
1893 If POS is nil, it defauls to the current point.
1894 If POS is out of range, the value is nil. */)
1895 (pos)
1896 Lisp_Object pos;
1897 {
1898 Lisp_Object ch;
1899 struct charset *charset;
1900
1901 ch = Fchar_after (pos);
1902 if (! INTEGERP (ch))
1903 return ch;
1904 charset = CHAR_CHARSET (XINT (ch));
1905 return (CHARSET_NAME (charset));
1906 }
1907
1908
1909 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1910 doc: /*
1911 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1912
1913 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1914 by their DIMENSION, CHARS, and FINAL-CHAR,
1915 where as Emacs distinguishes them by charset symbol.
1916 See the documentation of the function `charset-info' for the meanings of
1917 DIMENSION, CHARS, and FINAL-CHAR. */)
1918 (dimension, chars, final_char)
1919 Lisp_Object dimension, chars, final_char;
1920 {
1921 int id;
1922 int chars_flag;
1923
1924 check_iso_charset_parameter (dimension, chars, final_char);
1925 chars_flag = XFASTINT (chars) == 96;
1926 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
1927 XFASTINT (final_char));
1928 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
1929 }
1930
1931
1932 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1933 0, 0, 0,
1934 doc: /*
1935 Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1936 ()
1937 {
1938 int i;
1939 struct charset *charset;
1940 Lisp_Object attrs;
1941
1942 for (i = 0; i < charset_table_used; i++)
1943 {
1944 charset = CHARSET_FROM_ID (i);
1945 attrs = CHARSET_ATTRIBUTES (charset);
1946
1947 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1948 {
1949 CHARSET_ATTR_DECODER (attrs) = Qnil;
1950 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1951 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1952 }
1953
1954 if (CHARSET_UNIFIED_P (charset))
1955 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
1956 }
1957
1958 if (CHAR_TABLE_P (Vchar_unified_charset_table))
1959 {
1960 Foptimize_char_table (Vchar_unified_charset_table);
1961 Vchar_unify_table = Vchar_unified_charset_table;
1962 Vchar_unified_charset_table = Qnil;
1963 }
1964
1965 return Qnil;
1966 }
1967
1968 DEFUN ("charset-priority-list", Fcharset_priority_list,
1969 Scharset_priority_list, 0, 1, 0,
1970 doc: /* Return the list of charsets ordered by priority.
1971 HIGHESTP non-nil means just return the highest priority one. */)
1972 (highestp)
1973 Lisp_Object highestp;
1974 {
1975 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
1976
1977 if (!NILP (highestp))
1978 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
1979
1980 while (!NILP (list))
1981 {
1982 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
1983 list = XCDR (list);
1984 }
1985 return Fnreverse (val);
1986 }
1987
1988 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
1989 1, MANY, 0,
1990 doc: /* Assign higher priority to the charsets given as arguments.
1991 usage: (set-charset-priority &rest charsets) */)
1992 (nargs, args)
1993 int nargs;
1994 Lisp_Object *args;
1995 {
1996 Lisp_Object new_head, old_list, arglist[2];
1997 Lisp_Object list_2022, list_emacs_mule;
1998 int i, id;
1999
2000 old_list = Fcopy_sequence (Vcharset_ordered_list);
2001 new_head = Qnil;
2002 for (i = 0; i < nargs; i++)
2003 {
2004 CHECK_CHARSET_GET_ID (args[i], id);
2005 if (! NILP (Fmemq (make_number (id), old_list)))
2006 {
2007 old_list = Fdelq (make_number (id), old_list);
2008 new_head = Fcons (make_number (id), new_head);
2009 }
2010 }
2011 arglist[0] = Fnreverse (new_head);
2012 arglist[1] = old_list;
2013 Vcharset_ordered_list = Fnconc (2, arglist);
2014 charset_ordered_list_tick++;
2015
2016 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2017 CONSP (old_list); old_list = XCDR (old_list))
2018 {
2019 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2020 list_2022 = Fcons (XCAR (old_list), list_2022);
2021 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2022 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2023 }
2024 Viso_2022_charset_list = Fnreverse (list_2022);
2025 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2026
2027 return Qnil;
2028 }
2029
2030 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2031 0, 1, 0,
2032 doc: /* Internal use only.
2033 Return charset identification number of CHARSET. */)
2034 (charset)
2035 Lisp_Object charset;
2036 {
2037 int id;
2038
2039 CHECK_CHARSET_GET_ID (charset, id);
2040 return make_number (id);
2041 }
2042
2043 \f
2044 void
2045 init_charset ()
2046 {
2047 Vcharset_map_path
2048 = Fcons (Fexpand_file_name (build_string ("charsets"), Vdata_directory),
2049 Qnil);
2050 }
2051
2052
2053 void
2054 init_charset_once ()
2055 {
2056 int i, j, k;
2057
2058 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2059 for (j = 0; j < ISO_MAX_CHARS; j++)
2060 for (k = 0; k < ISO_MAX_FINAL; k++)
2061 iso_charset_table[i][j][k] = -1;
2062
2063 for (i = 0; i < 256; i++)
2064 emacs_mule_charset[i] = NULL;
2065
2066 charset_jisx0201_roman = -1;
2067 charset_jisx0208_1978 = -1;
2068 charset_jisx0208 = -1;
2069
2070 for (i = 0; i < 128; i++)
2071 unibyte_to_multibyte_table[i] = i;
2072 for (; i < 256; i++)
2073 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
2074 }
2075
2076 #ifdef emacs
2077
2078 void
2079 syms_of_charset ()
2080 {
2081 DEFSYM (Qcharsetp, "charsetp");
2082
2083 DEFSYM (Qascii, "ascii");
2084 DEFSYM (Qunicode, "unicode");
2085 DEFSYM (Qeight_bit, "eight-bit");
2086 DEFSYM (Qiso_8859_1, "iso-8859-1");
2087
2088 DEFSYM (Qgl, "gl");
2089 DEFSYM (Qgr, "gr");
2090
2091 staticpro (&Vcharset_ordered_list);
2092 Vcharset_ordered_list = Qnil;
2093
2094 staticpro (&Viso_2022_charset_list);
2095 Viso_2022_charset_list = Qnil;
2096
2097 staticpro (&Vemacs_mule_charset_list);
2098 Vemacs_mule_charset_list = Qnil;
2099
2100 staticpro (&Vcharset_hash_table);
2101 {
2102 Lisp_Object args[2];
2103 args[0] = QCtest;
2104 args[1] = Qeq;
2105 Vcharset_hash_table = Fmake_hash_table (2, args);
2106 }
2107
2108 charset_table_size = 128;
2109 charset_table = ((struct charset *)
2110 xmalloc (sizeof (struct charset) * charset_table_size));
2111 charset_table_used = 0;
2112
2113 staticpro (&Vchar_unified_charset_table);
2114 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
2115
2116 defsubr (&Scharsetp);
2117 defsubr (&Smap_charset_chars);
2118 defsubr (&Sdefine_charset_internal);
2119 defsubr (&Sdefine_charset_alias);
2120 defsubr (&Sunibyte_charset);
2121 defsubr (&Sset_unibyte_charset);
2122 defsubr (&Scharset_plist);
2123 defsubr (&Sset_charset_plist);
2124 defsubr (&Sunify_charset);
2125 defsubr (&Sget_unused_iso_final_char);
2126 defsubr (&Sdeclare_equiv_charset);
2127 defsubr (&Sfind_charset_region);
2128 defsubr (&Sfind_charset_string);
2129 defsubr (&Sdecode_char);
2130 defsubr (&Sencode_char);
2131 defsubr (&Ssplit_char);
2132 defsubr (&Smake_char);
2133 defsubr (&Schar_charset);
2134 defsubr (&Scharset_after);
2135 defsubr (&Siso_charset);
2136 defsubr (&Sclear_charset_maps);
2137 defsubr (&Scharset_priority_list);
2138 defsubr (&Sset_charset_priority);
2139 defsubr (&Scharset_id_internal);
2140
2141 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2142 doc: /* *Lisp of directories to search for charset map files. */);
2143 Vcharset_map_path = Qnil;
2144
2145 DEFVAR_LISP ("charset-list", &Vcharset_list,
2146 doc: /* List of all charsets ever defined. */);
2147 Vcharset_list = Qnil;
2148
2149 charset_ascii
2150 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2151 0, 127, 'B', -1, 0, 1, 0, 0);
2152 charset_iso_8859_1
2153 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2154 0, 255, -1, -1, -1, 1, 0, 0);
2155 charset_unicode
2156 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2157 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2158 charset_eight_bit
2159 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2160 128, 255, -1, 0, -1, 0, 0,
2161 MAX_5_BYTE_CHAR + 1);
2162 }
2163
2164 #endif /* emacs */
2165
2166 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2167 (do not change this comment) */