]> code.delx.au - gnu-emacs/blob - src/charset.c
Revision: miles@gnu.org--gnu-2005/emacs--unicode--0--patch-97
[gnu-emacs] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
4 National Institute of Advanced Industrial Science and Technology (AIST)
5 Registration Number H14PRO021
6 Copyright (C) 2003, 2004
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H13PRO009
9
10 This file is part of GNU Emacs.
11
12 GNU Emacs is free software; you can redistribute it and/or modify
13 it under the terms of the GNU General Public License as published by
14 the Free Software Foundation; either version 2, or (at your option)
15 any later version.
16
17 GNU Emacs is distributed in the hope that it will be useful,
18 but WITHOUT ANY WARRANTY; without even the implied warranty of
19 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
20 GNU General Public License for more details.
21
22 You should have received a copy of the GNU General Public License
23 along with GNU Emacs; see the file COPYING. If not, write to
24 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
25 Boston, MA 02110-1301, USA. */
26
27 #include <config.h>
28
29 #include <stdio.h>
30 #include <unistd.h>
31 #include <ctype.h>
32 #include <sys/types.h>
33 #include "lisp.h"
34 #include "character.h"
35 #include "charset.h"
36 #include "coding.h"
37 #include "disptab.h"
38 #include "buffer.h"
39
40 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
41
42 A coded character set ("charset" hereafter) is a meaningful
43 collection (i.e. language, culture, functionality, etc.) of
44 characters. Emacs handles multiple charsets at once. In Emacs Lisp
45 code, a charset is represented by a symbol. In C code, a charset is
46 represented by its ID number or by a pointer to a struct charset.
47
48 The actual information about each charset is stored in two places.
49 Lispy information is stored in the hash table Vcharset_hash_table as
50 a vector (charset attributes). The other information is stored in
51 charset_table as a struct charset.
52
53 */
54
55 /* List of all charsets. This variable is used only from Emacs
56 Lisp. */
57 Lisp_Object Vcharset_list;
58
59 /* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61 Lisp_Object Vcharset_hash_table;
62
63 /* Table of struct charset. */
64 struct charset *charset_table;
65
66 static int charset_table_size;
67 static int charset_table_used;
68
69 Lisp_Object Qcharsetp;
70
71 /* Special charset symbols. */
72 Lisp_Object Qascii;
73 Lisp_Object Qeight_bit;
74 Lisp_Object Qiso_8859_1;
75 Lisp_Object Qunicode;
76
77 /* The corresponding charsets. */
78 int charset_ascii;
79 int charset_eight_bit;
80 int charset_iso_8859_1;
81 int charset_unicode;
82
83 /* The other special charsets. */
84 int charset_jisx0201_roman;
85 int charset_jisx0208_1978;
86 int charset_jisx0208;
87
88 /* Value of charset attribute `charset-iso-plane'. */
89 Lisp_Object Qgl, Qgr;
90
91 /* Charset of unibyte characters. */
92 int charset_unibyte;
93
94 /* List of charsets ordered by the priority. */
95 Lisp_Object Vcharset_ordered_list;
96
97 /* Incremented everytime we change Vcharset_ordered_list. This is
98 unsigned short so that it fits in Lisp_Int and never matches
99 -1. */
100 unsigned short charset_ordered_list_tick;
101
102 /* List of iso-2022 charsets. */
103 Lisp_Object Viso_2022_charset_list;
104
105 /* List of emacs-mule charsets. */
106 Lisp_Object Vemacs_mule_charset_list;
107
108 struct charset *emacs_mule_charset[256];
109
110 /* Mapping table from ISO2022's charset (specified by DIMENSION,
111 CHARS, and FINAL-CHAR) to Emacs' charset. */
112 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
113
114 Lisp_Object Vcharset_map_path;
115
116 Lisp_Object Vchar_unified_charset_table;
117
118 /* Defined in chartab.c */
119 extern void
120 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
121 Lisp_Object function, Lisp_Object table,
122 Lisp_Object arg, struct charset *charset,
123 unsigned from, unsigned to));
124
125 #define CODE_POINT_TO_INDEX(charset, code) \
126 ((charset)->code_linear_p \
127 ? (code) - (charset)->min_code \
128 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
129 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
130 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
131 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
132 ? (((((code) >> 24) - (charset)->code_space[12]) \
133 * (charset)->code_space[11]) \
134 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
135 * (charset)->code_space[7]) \
136 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
137 * (charset)->code_space[3]) \
138 + (((code) & 0xFF) - (charset)->code_space[0]) \
139 - ((charset)->char_index_offset)) \
140 : -1)
141
142
143 /* Convert the character index IDX to code-point CODE for CHARSET.
144 It is assumed that IDX is in a valid range. */
145
146 #define INDEX_TO_CODE_POINT(charset, idx) \
147 ((charset)->code_linear_p \
148 ? (idx) + (charset)->min_code \
149 : (idx += (charset)->char_index_offset, \
150 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
151 | (((charset)->code_space[4] \
152 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
153 << 8) \
154 | (((charset)->code_space[8] \
155 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
156 << 16) \
157 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
158 << 24))))
159
160
161 \f
162
163 /* Set to 1 to warn that a charset map is loaded and thus a buffer
164 text and a string data may be relocated. */
165 int charset_map_loaded;
166
167 struct charset_map_entries
168 {
169 struct {
170 unsigned from, to;
171 int c;
172 } entry[0x10000];
173 struct charset_map_entries *next;
174 };
175
176 /* Load the mapping information for CHARSET from ENTRIES.
177
178 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
179
180 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
181 CHARSET->decoder, and CHARSET->encoder.
182
183 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
184 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
185 setup it too. */
186
187 static void
188 load_charset_map (charset, entries, n_entries, control_flag)
189 struct charset *charset;
190 struct charset_map_entries *entries;
191 int n_entries;
192 int control_flag;
193 {
194 Lisp_Object vec, table;
195 unsigned max_code = CHARSET_MAX_CODE (charset);
196 int ascii_compatible_p = charset->ascii_compatible_p;
197 int min_char, max_char, nonascii_min_char;
198 int i;
199 unsigned char *fast_map = charset->fast_map;
200
201 if (n_entries <= 0)
202 return;
203
204 if (control_flag > 0)
205 {
206 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
207
208 table = Fmake_char_table (Qnil, Qnil);
209 if (control_flag == 1)
210 vec = Fmake_vector (make_number (n), make_number (-1));
211 else if (! CHAR_TABLE_P (Vchar_unify_table))
212 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
213
214 charset_map_loaded = 1;
215 }
216
217 min_char = max_char = entries->entry[0].c;
218 nonascii_min_char = MAX_CHAR;
219 for (i = 0; i < n_entries; i++)
220 {
221 unsigned from, to;
222 int from_index, to_index;
223 int from_c, to_c;
224 int idx = i % 0x10000;
225
226 if (i > 0 && idx == 0)
227 entries = entries->next;
228 from = entries->entry[idx].from;
229 to = entries->entry[idx].to;
230 from_c = entries->entry[idx].c;
231 from_index = CODE_POINT_TO_INDEX (charset, from);
232 if (from == to)
233 {
234 to_index = from_index;
235 to_c = from_c;
236 }
237 else
238 {
239 to_index = CODE_POINT_TO_INDEX (charset, to);
240 to_c = from_c + (to_index - from_index);
241 }
242 if (from_index < 0 || to_index < 0)
243 continue;
244
245 if (control_flag < 2)
246 {
247 int c;
248
249 if (to_c > max_char)
250 max_char = to_c;
251 else if (from_c < min_char)
252 min_char = from_c;
253 if (ascii_compatible_p)
254 {
255 if (! ASCII_BYTE_P (from_c))
256 {
257 if (from_c < nonascii_min_char)
258 nonascii_min_char = from_c;
259 }
260 else if (! ASCII_BYTE_P (to_c))
261 {
262 nonascii_min_char = 0x80;
263 }
264 }
265
266 for (c = from_c; c <= to_c; c++)
267 CHARSET_FAST_MAP_SET (c, fast_map);
268
269 if (control_flag == 1)
270 {
271 unsigned code = from;
272
273 if (CHARSET_COMPACT_CODES_P (charset))
274 while (1)
275 {
276 ASET (vec, from_index, make_number (from_c));
277 if (NILP (CHAR_TABLE_REF (table, from_c)))
278 CHAR_TABLE_SET (table, from_c, make_number (code));
279 if (from_index == to_index)
280 break;
281 from_index++, from_c++;
282 code = INDEX_TO_CODE_POINT (charset, from_index);
283 }
284 else
285 for (; from_index <= to_index; from_index++, from_c++)
286 {
287 ASET (vec, from_index, make_number (from_c));
288 if (NILP (CHAR_TABLE_REF (table, from_c)))
289 CHAR_TABLE_SET (table, from_c, make_number (from_index));
290 }
291 }
292 }
293 else
294 {
295 unsigned code = from;
296
297 while (1)
298 {
299 int c1 = DECODE_CHAR (charset, code);
300
301 if (c1 >= 0)
302 {
303 CHAR_TABLE_SET (table, from_c, make_number (c1));
304 CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
305 if (CHAR_TABLE_P (Vchar_unified_charset_table))
306 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
307 CHARSET_NAME (charset));
308 }
309 if (from_index == to_index)
310 break;
311 from_index++, from_c++;
312 code = INDEX_TO_CODE_POINT (charset, from_index);
313 }
314 }
315 }
316
317 if (control_flag < 2)
318 {
319 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
320 ? nonascii_min_char : min_char);
321 CHARSET_MAX_CHAR (charset) = max_char;
322 if (control_flag == 1)
323 {
324 CHARSET_DECODER (charset) = vec;
325 CHARSET_ENCODER (charset) = table;
326 }
327 }
328 else
329 CHARSET_DEUNIFIER (charset) = table;
330 }
331
332
333 /* Read a hexadecimal number (preceded by "0x") from the file FP while
334 paying attention to comment charcter '#'. */
335
336 static INLINE unsigned
337 read_hex (fp, eof)
338 FILE *fp;
339 int *eof;
340 {
341 int c;
342 unsigned n;
343
344 while ((c = getc (fp)) != EOF)
345 {
346 if (c == '#')
347 {
348 while ((c = getc (fp)) != EOF && c != '\n');
349 }
350 else if (c == '0')
351 {
352 if ((c = getc (fp)) == EOF || c == 'x')
353 break;
354 }
355 }
356 if (c == EOF)
357 {
358 *eof = 1;
359 return 0;
360 }
361 *eof = 0;
362 n = 0;
363 if (c == 'x')
364 while ((c = getc (fp)) != EOF && isxdigit (c))
365 n = ((n << 4)
366 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
367 else
368 while ((c = getc (fp)) != EOF && isdigit (c))
369 n = (n * 10) + c - '0';
370 if (c != EOF)
371 ungetc (c, fp);
372 return n;
373 }
374
375
376 /* Return a mapping vector for CHARSET loaded from MAPFILE.
377 Each line of MAPFILE has this form
378 0xAAAA 0xCCCC
379 where 0xAAAA is a code-point and 0xCCCC is the corresponding
380 character code, or this form
381 0xAAAA-0xBBBB 0xCCCC
382 where 0xAAAA and 0xBBBB are code-points specifying a range, and
383 0xCCCC is the first character code of the range.
384
385 The returned vector has this form:
386 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
387 where CODE1 is a code-point or a cons of code-points specifying a
388 range. */
389
390 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
391
392 static void
393 load_charset_map_from_file (charset, mapfile, control_flag)
394 struct charset *charset;
395 Lisp_Object mapfile;
396 int control_flag;
397 {
398 unsigned min_code = CHARSET_MIN_CODE (charset);
399 unsigned max_code = CHARSET_MAX_CODE (charset);
400 int fd;
401 FILE *fp;
402 int eof;
403 Lisp_Object suffixes;
404 struct charset_map_entries *head, *entries;
405 int n_entries;
406
407 suffixes = Fcons (build_string (".map"),
408 Fcons (build_string (".TXT"), Qnil));
409
410 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
411 if (fd < 0
412 || ! (fp = fdopen (fd, "r")))
413 {
414 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
415 return;
416 }
417
418 head = entries = ((struct charset_map_entries *)
419 alloca (sizeof (struct charset_map_entries)));
420 n_entries = 0;
421 eof = 0;
422 while (1)
423 {
424 unsigned from, to;
425 int c;
426 int idx;
427
428 from = read_hex (fp, &eof);
429 if (eof)
430 break;
431 if (getc (fp) == '-')
432 to = read_hex (fp, &eof);
433 else
434 to = from;
435 c = (int) read_hex (fp, &eof);
436
437 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
438 continue;
439
440 if (n_entries > 0 && (n_entries % 0x10000) == 0)
441 {
442 entries->next = ((struct charset_map_entries *)
443 alloca (sizeof (struct charset_map_entries)));
444 entries = entries->next;
445 }
446 idx = n_entries % 0x10000;
447 entries->entry[idx].from = from;
448 entries->entry[idx].to = to;
449 entries->entry[idx].c = c;
450 n_entries++;
451 }
452 fclose (fp);
453 close (fd);
454
455 load_charset_map (charset, head, n_entries, control_flag);
456 }
457
458 static void
459 load_charset_map_from_vector (charset, vec, control_flag)
460 struct charset *charset;
461 Lisp_Object vec;
462 int control_flag;
463 {
464 unsigned min_code = CHARSET_MIN_CODE (charset);
465 unsigned max_code = CHARSET_MAX_CODE (charset);
466 struct charset_map_entries *head, *entries;
467 int n_entries;
468 int len = ASIZE (vec);
469 int i;
470
471 if (len % 2 == 1)
472 {
473 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
474 return;
475 }
476
477 head = entries = ((struct charset_map_entries *)
478 alloca (sizeof (struct charset_map_entries)));
479 n_entries = 0;
480 for (i = 0; i < len; i += 2)
481 {
482 Lisp_Object val, val2;
483 unsigned from, to;
484 int c;
485 int idx;
486
487 val = AREF (vec, i);
488 if (CONSP (val))
489 {
490 val2 = XCDR (val);
491 val = XCAR (val);
492 CHECK_NATNUM (val);
493 CHECK_NATNUM (val2);
494 from = XFASTINT (val);
495 to = XFASTINT (val2);
496 }
497 else
498 {
499 CHECK_NATNUM (val);
500 from = to = XFASTINT (val);
501 }
502 val = AREF (vec, i + 1);
503 CHECK_NATNUM (val);
504 c = XFASTINT (val);
505
506 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
507 continue;
508
509 if (n_entries > 0 && (n_entries % 0x10000) == 0)
510 {
511 entries->next = ((struct charset_map_entries *)
512 alloca (sizeof (struct charset_map_entries)));
513 entries = entries->next;
514 }
515 idx = n_entries % 0x10000;
516 entries->entry[idx].from = from;
517 entries->entry[idx].to = to;
518 entries->entry[idx].c = c;
519 n_entries++;
520 }
521
522 load_charset_map (charset, head, n_entries, control_flag);
523 }
524
525 static void
526 load_charset (charset)
527 struct charset *charset;
528 {
529 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
530 {
531 Lisp_Object map;
532
533 map = CHARSET_MAP (charset);
534 if (STRINGP (map))
535 load_charset_map_from_file (charset, map, 1);
536 else
537 load_charset_map_from_vector (charset, map, 1);
538 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
539 }
540 }
541
542
543 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
544 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
545 (object)
546 Lisp_Object object;
547 {
548 return (CHARSETP (object) ? Qt : Qnil);
549 }
550
551
552 void
553 map_charset_chars (c_function, function, arg,
554 charset, from, to)
555 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
556 Lisp_Object function, arg;
557 struct charset *charset;
558 unsigned from, to;
559 {
560 Lisp_Object range;
561 int partial;
562
563 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
564 load_charset (charset);
565
566 partial = (from > CHARSET_MIN_CODE (charset)
567 || to < CHARSET_MAX_CODE (charset));
568
569 if (CHARSET_UNIFIED_P (charset)
570 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
571 {
572 map_char_table_for_charset (c_function, function,
573 CHARSET_DEUNIFIER (charset), arg,
574 partial ? charset : NULL, from, to);
575 }
576
577 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
578 {
579 int from_idx = CODE_POINT_TO_INDEX (charset, from);
580 int to_idx = CODE_POINT_TO_INDEX (charset, to);
581 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
582 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
583
584 range = Fcons (make_number (from_c), make_number (to_c));
585 if (NILP (function))
586 (*c_function) (arg, range);
587 else
588 call2 (function, range, arg);
589 }
590 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
591 {
592 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
593 return;
594 if (CHARSET_ASCII_COMPATIBLE_P (charset) && from <= 127)
595 {
596 range = Fcons (make_number (from), make_number (to));
597 if (to >= 128)
598 XSETCAR (range, make_number (127));
599
600 if (NILP (function))
601 (*c_function) (arg, range);
602 else
603 call2 (function, range, arg);
604 }
605 map_char_table_for_charset (c_function, function,
606 CHARSET_ENCODER (charset), arg,
607 partial ? charset : NULL, from, to);
608 }
609 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
610 {
611 Lisp_Object subset_info;
612 int offset;
613
614 subset_info = CHARSET_SUBSET (charset);
615 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
616 offset = XINT (AREF (subset_info, 3));
617 from -= offset;
618 if (from < XFASTINT (AREF (subset_info, 1)))
619 from = XFASTINT (AREF (subset_info, 1));
620 to -= offset;
621 if (to > XFASTINT (AREF (subset_info, 2)))
622 to = XFASTINT (AREF (subset_info, 2));
623 map_charset_chars (c_function, function, arg, charset, from, to);
624 }
625 else /* i.e. CHARSET_METHOD_SUPERSET */
626 {
627 Lisp_Object parents;
628
629 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
630 parents = XCDR (parents))
631 {
632 int offset;
633 unsigned this_from, this_to;
634
635 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
636 offset = XINT (XCDR (XCAR (parents)));
637 this_from = from - offset;
638 this_to = to - offset;
639 if (this_from < CHARSET_MIN_CODE (charset))
640 this_from = CHARSET_MIN_CODE (charset);
641 if (this_to > CHARSET_MAX_CODE (charset))
642 this_to = CHARSET_MAX_CODE (charset);
643 map_charset_chars (c_function, function, arg, charset,
644 this_from, this_to);
645 }
646 }
647 }
648
649 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
650 doc: /* Call FUNCTION for all characters in CHARSET.
651 FUNCTION is called with an argument RANGE and the optional 3rd
652 argument ARG.
653
654 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
655 characters contained in CHARSET.
656
657 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
658 range of code points of target characters. */)
659 (function, charset, arg, from_code, to_code)
660 Lisp_Object function, charset, arg, from_code, to_code;
661 {
662 struct charset *cs;
663 unsigned from, to;
664
665 CHECK_CHARSET_GET_CHARSET (charset, cs);
666 if (NILP (from_code))
667 from = CHARSET_MIN_CODE (cs);
668 else
669 {
670 CHECK_NATNUM (from_code);
671 from = XINT (from_code);
672 if (from < CHARSET_MIN_CODE (cs))
673 from = CHARSET_MIN_CODE (cs);
674 }
675 if (NILP (to_code))
676 to = CHARSET_MAX_CODE (cs);
677 else
678 {
679 CHECK_NATNUM (to_code);
680 to = XINT (to_code);
681 if (to > CHARSET_MAX_CODE (cs))
682 to = CHARSET_MAX_CODE (cs);
683 }
684 map_charset_chars (NULL, function, arg, cs, from, to);
685 return Qnil;
686 }
687
688
689 /* Define a charset according to the arguments. The Nth argument is
690 the Nth attribute of the charset (the last attribute `charset-id'
691 is not included). See the docstring of `define-charset' for the
692 detail. */
693
694 DEFUN ("define-charset-internal", Fdefine_charset_internal,
695 Sdefine_charset_internal, charset_arg_max, MANY, 0,
696 doc: /* For internal use only.
697 usage: (define-charset-internal ...) */)
698 (nargs, args)
699 int nargs;
700 Lisp_Object *args;
701 {
702 /* Charset attr vector. */
703 Lisp_Object attrs;
704 Lisp_Object val;
705 unsigned hash_code;
706 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
707 int i, j;
708 struct charset charset;
709 int id;
710 int dimension;
711 int new_definition_p;
712 int nchars;
713
714 if (nargs != charset_arg_max)
715 return Fsignal (Qwrong_number_of_arguments,
716 Fcons (intern ("define-charset-internal"),
717 make_number (nargs)));
718
719 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
720
721 CHECK_SYMBOL (args[charset_arg_name]);
722 ASET (attrs, charset_name, args[charset_arg_name]);
723
724 val = args[charset_arg_code_space];
725 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
726 {
727 int min_byte, max_byte;
728
729 min_byte = XINT (Faref (val, make_number (i * 2)));
730 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
731 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
732 error ("Invalid :code-space value");
733 charset.code_space[i * 4] = min_byte;
734 charset.code_space[i * 4 + 1] = max_byte;
735 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
736 nchars *= charset.code_space[i * 4 + 2];
737 charset.code_space[i * 4 + 3] = nchars;
738 if (max_byte > 0)
739 dimension = i + 1;
740 }
741
742 val = args[charset_arg_dimension];
743 if (NILP (val))
744 charset.dimension = dimension;
745 else
746 {
747 CHECK_NATNUM (val);
748 charset.dimension = XINT (val);
749 if (charset.dimension < 1 || charset.dimension > 4)
750 args_out_of_range_3 (val, make_number (1), make_number (4));
751 }
752
753 charset.code_linear_p
754 = (charset.dimension == 1
755 || (charset.code_space[2] == 256
756 && (charset.dimension == 2
757 || (charset.code_space[6] == 256
758 && (charset.dimension == 3
759 || charset.code_space[10] == 256)))));
760
761 if (! charset.code_linear_p)
762 {
763 charset.code_space_mask = (unsigned char *) xmalloc (256);
764 bzero (charset.code_space_mask, 256);
765 for (i = 0; i < 4; i++)
766 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
767 j++)
768 charset.code_space_mask[j] |= (1 << i);
769 }
770
771 charset.iso_chars_96 = charset.code_space[2] == 96;
772
773 charset.min_code = (charset.code_space[0]
774 | (charset.code_space[4] << 8)
775 | (charset.code_space[8] << 16)
776 | (charset.code_space[12] << 24));
777 charset.max_code = (charset.code_space[1]
778 | (charset.code_space[5] << 8)
779 | (charset.code_space[9] << 16)
780 | (charset.code_space[13] << 24));
781 charset.char_index_offset = 0;
782
783 val = args[charset_arg_min_code];
784 if (! NILP (val))
785 {
786 unsigned code;
787
788 if (INTEGERP (val))
789 code = XINT (val);
790 else
791 {
792 CHECK_CONS (val);
793 CHECK_NUMBER_CAR (val);
794 CHECK_NUMBER_CDR (val);
795 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
796 }
797 if (code < charset.min_code
798 || code > charset.max_code)
799 args_out_of_range_3 (make_number (charset.min_code),
800 make_number (charset.max_code), val);
801 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
802 charset.min_code = code;
803 }
804
805 val = args[charset_arg_max_code];
806 if (! NILP (val))
807 {
808 unsigned code;
809
810 if (INTEGERP (val))
811 code = XINT (val);
812 else
813 {
814 CHECK_CONS (val);
815 CHECK_NUMBER_CAR (val);
816 CHECK_NUMBER_CDR (val);
817 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
818 }
819 if (code < charset.min_code
820 || code > charset.max_code)
821 args_out_of_range_3 (make_number (charset.min_code),
822 make_number (charset.max_code), val);
823 charset.max_code = code;
824 }
825
826 charset.compact_codes_p = charset.max_code < 0x1000000;
827
828 val = args[charset_arg_invalid_code];
829 if (NILP (val))
830 {
831 if (charset.min_code > 0)
832 charset.invalid_code = 0;
833 else
834 {
835 XSETINT (val, charset.max_code + 1);
836 if (XINT (val) == charset.max_code + 1)
837 charset.invalid_code = charset.max_code + 1;
838 else
839 error ("Attribute :invalid-code must be specified");
840 }
841 }
842 else
843 {
844 CHECK_NATNUM (val);
845 charset.invalid_code = XFASTINT (val);
846 }
847
848 val = args[charset_arg_iso_final];
849 if (NILP (val))
850 charset.iso_final = -1;
851 else
852 {
853 CHECK_NUMBER (val);
854 if (XINT (val) < '0' || XINT (val) > 127)
855 error ("Invalid iso-final-char: %d", XINT (val));
856 charset.iso_final = XINT (val);
857 }
858
859 val = args[charset_arg_iso_revision];
860 if (NILP (val))
861 charset.iso_revision = -1;
862 else
863 {
864 CHECK_NUMBER (val);
865 if (XINT (val) > 63)
866 args_out_of_range (make_number (63), val);
867 charset.iso_revision = XINT (val);
868 }
869
870 val = args[charset_arg_emacs_mule_id];
871 if (NILP (val))
872 charset.emacs_mule_id = -1;
873 else
874 {
875 CHECK_NATNUM (val);
876 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
877 error ("Invalid emacs-mule-id: %d", XINT (val));
878 charset.emacs_mule_id = XINT (val);
879 }
880
881 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
882
883 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
884
885 charset.unified_p = 0;
886
887 bzero (charset.fast_map, sizeof (charset.fast_map));
888
889 if (! NILP (args[charset_arg_code_offset]))
890 {
891 val = args[charset_arg_code_offset];
892 CHECK_NUMBER (val);
893
894 charset.method = CHARSET_METHOD_OFFSET;
895 charset.code_offset = XINT (val);
896
897 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
898 charset.min_char = i + charset.code_offset;
899 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
900 charset.max_char = i + charset.code_offset;
901 if (charset.max_char > MAX_CHAR)
902 error ("Unsupported max char: %d", charset.max_char);
903
904 i = (charset.min_char >> 7) << 7;
905 for (; i < 0x10000 && i <= charset.max_char; i += 128)
906 CHARSET_FAST_MAP_SET (i, charset.fast_map);
907 i = (i >> 12) << 12;
908 for (; i <= charset.max_char; i += 0x1000)
909 CHARSET_FAST_MAP_SET (i, charset.fast_map);
910 }
911 else if (! NILP (args[charset_arg_map]))
912 {
913 val = args[charset_arg_map];
914 ASET (attrs, charset_map, val);
915 if (STRINGP (val))
916 load_charset_map_from_file (&charset, val, 0);
917 else
918 load_charset_map_from_vector (&charset, val, 0);
919 charset.method = CHARSET_METHOD_MAP_DEFERRED;
920 }
921 else if (! NILP (args[charset_arg_subset]))
922 {
923 Lisp_Object parent;
924 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
925 struct charset *parent_charset;
926
927 val = args[charset_arg_subset];
928 parent = Fcar (val);
929 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
930 parent_min_code = Fnth (make_number (1), val);
931 CHECK_NATNUM (parent_min_code);
932 parent_max_code = Fnth (make_number (2), val);
933 CHECK_NATNUM (parent_max_code);
934 parent_code_offset = Fnth (make_number (3), val);
935 CHECK_NUMBER (parent_code_offset);
936 val = Fmake_vector (make_number (4), Qnil);
937 ASET (val, 0, make_number (parent_charset->id));
938 ASET (val, 1, parent_min_code);
939 ASET (val, 2, parent_max_code);
940 ASET (val, 3, parent_code_offset);
941 ASET (attrs, charset_subset, val);
942
943 charset.method = CHARSET_METHOD_SUBSET;
944 /* Here, we just copy the parent's fast_map. It's not accurate,
945 but at least it works for quickly detecting which character
946 DOESN'T belong to this charset. */
947 for (i = 0; i < 190; i++)
948 charset.fast_map[i] = parent_charset->fast_map[i];
949
950 /* We also copy these for parents. */
951 charset.min_char = parent_charset->min_char;
952 charset.max_char = parent_charset->max_char;
953 }
954 else if (! NILP (args[charset_arg_superset]))
955 {
956 val = args[charset_arg_superset];
957 charset.method = CHARSET_METHOD_SUPERSET;
958 val = Fcopy_sequence (val);
959 ASET (attrs, charset_superset, val);
960
961 charset.min_char = MAX_CHAR;
962 charset.max_char = 0;
963 for (; ! NILP (val); val = Fcdr (val))
964 {
965 Lisp_Object elt, car_part, cdr_part;
966 int this_id, offset;
967 struct charset *this_charset;
968
969 elt = Fcar (val);
970 if (CONSP (elt))
971 {
972 car_part = XCAR (elt);
973 cdr_part = XCDR (elt);
974 CHECK_CHARSET_GET_ID (car_part, this_id);
975 CHECK_NUMBER (cdr_part);
976 offset = XINT (cdr_part);
977 }
978 else
979 {
980 CHECK_CHARSET_GET_ID (elt, this_id);
981 offset = 0;
982 }
983 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
984
985 this_charset = CHARSET_FROM_ID (this_id);
986 if (charset.min_char > this_charset->min_char)
987 charset.min_char = this_charset->min_char;
988 if (charset.max_char < this_charset->max_char)
989 charset.max_char = this_charset->max_char;
990 for (i = 0; i < 190; i++)
991 charset.fast_map[i] |= this_charset->fast_map[i];
992 }
993 }
994 else
995 error ("None of :code-offset, :map, :parents are specified");
996
997 val = args[charset_arg_unify_map];
998 if (! NILP (val) && !STRINGP (val))
999 CHECK_VECTOR (val);
1000 ASET (attrs, charset_unify_map, val);
1001
1002 CHECK_LIST (args[charset_arg_plist]);
1003 ASET (attrs, charset_plist, args[charset_arg_plist]);
1004
1005 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1006 &hash_code);
1007 if (charset.hash_index >= 0)
1008 {
1009 new_definition_p = 0;
1010 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1011 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1012 }
1013 else
1014 {
1015 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1016 hash_code);
1017 if (charset_table_used == charset_table_size)
1018 {
1019 struct charset *new_table
1020 = (struct charset *) xmalloc (sizeof (struct charset)
1021 * (charset_table_size + 16));
1022 bcopy (charset_table, new_table,
1023 sizeof (struct charset) * charset_table_size);
1024 charset_table_size += 16;
1025 charset_table = new_table;
1026 }
1027 id = charset_table_used++;
1028 new_definition_p = 1;
1029 }
1030
1031 ASET (attrs, charset_id, make_number (id));
1032 charset.id = id;
1033 charset_table[id] = charset;
1034
1035 if (charset.iso_final >= 0)
1036 {
1037 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1038 charset.iso_final) = id;
1039 if (new_definition_p)
1040 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1041 Fcons (make_number (id), Qnil));
1042 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1043 charset_jisx0201_roman = id;
1044 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1045 charset_jisx0208_1978 = id;
1046 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1047 charset_jisx0208 = id;
1048 }
1049
1050 if (charset.emacs_mule_id >= 0)
1051 {
1052 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1053 if (charset.emacs_mule_id < 0xA0)
1054 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1055 if (new_definition_p)
1056 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1057 Fcons (make_number (id), Qnil));
1058 }
1059
1060 if (new_definition_p)
1061 {
1062 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1063 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1064 Fcons (make_number (id), Qnil));
1065 charset_ordered_list_tick++;
1066 }
1067
1068 return Qnil;
1069 }
1070
1071
1072 /* Same as Fdefine_charset_internal but arguments are more convenient
1073 to call from C (typically in syms_of_charset). This can define a
1074 charset of `offset' method only. Return the ID of the new
1075 charset. */
1076
1077 static int
1078 define_charset_internal (name, dimension, code_space, min_code, max_code,
1079 iso_final, iso_revision, emacs_mule_id,
1080 ascii_compatible, supplementary,
1081 code_offset)
1082 Lisp_Object name;
1083 int dimension;
1084 unsigned char *code_space;
1085 unsigned min_code, max_code;
1086 int iso_final, iso_revision, emacs_mule_id;
1087 int ascii_compatible, supplementary;
1088 int code_offset;
1089 {
1090 Lisp_Object args[charset_arg_max];
1091 Lisp_Object plist[14];
1092 Lisp_Object val;
1093 int i;
1094
1095 args[charset_arg_name] = name;
1096 args[charset_arg_dimension] = make_number (dimension);
1097 val = Fmake_vector (make_number (8), make_number (0));
1098 for (i = 0; i < 8; i++)
1099 ASET (val, i, make_number (code_space[i]));
1100 args[charset_arg_code_space] = val;
1101 args[charset_arg_min_code] = make_number (min_code);
1102 args[charset_arg_max_code] = make_number (max_code);
1103 args[charset_arg_iso_final]
1104 = (iso_final < 0 ? Qnil : make_number (iso_final));
1105 args[charset_arg_iso_revision] = make_number (iso_revision);
1106 args[charset_arg_emacs_mule_id]
1107 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1108 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1109 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1110 args[charset_arg_invalid_code] = Qnil;
1111 args[charset_arg_code_offset] = make_number (code_offset);
1112 args[charset_arg_map] = Qnil;
1113 args[charset_arg_subset] = Qnil;
1114 args[charset_arg_superset] = Qnil;
1115 args[charset_arg_unify_map] = Qnil;
1116
1117 plist[0] = intern (":name");
1118 plist[1] = args[charset_arg_name];
1119 plist[2] = intern (":dimension");
1120 plist[3] = args[charset_arg_dimension];
1121 plist[4] = intern (":code-space");
1122 plist[5] = args[charset_arg_code_space];
1123 plist[6] = intern (":iso-final-char");
1124 plist[7] = args[charset_arg_iso_final];
1125 plist[8] = intern (":emacs-mule-id");
1126 plist[9] = args[charset_arg_emacs_mule_id];
1127 plist[10] = intern (":ascii-compatible-p");
1128 plist[11] = args[charset_arg_ascii_compatible_p];
1129 plist[12] = intern (":code-offset");
1130 plist[13] = args[charset_arg_code_offset];
1131
1132 args[charset_arg_plist] = Flist (14, plist);
1133 Fdefine_charset_internal (charset_arg_max, args);
1134
1135 return XINT (CHARSET_SYMBOL_ID (name));
1136 }
1137
1138
1139 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1140 Sdefine_charset_alias, 2, 2, 0,
1141 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1142 (alias, charset)
1143 Lisp_Object alias, charset;
1144 {
1145 Lisp_Object attr;
1146
1147 CHECK_CHARSET_GET_ATTR (charset, attr);
1148 Fputhash (alias, attr, Vcharset_hash_table);
1149 Vcharset_list = Fcons (alias, Vcharset_list);
1150 return Qnil;
1151 }
1152
1153
1154 DEFUN ("unibyte-charset", Funibyte_charset, Sunibyte_charset, 0, 0, 0,
1155 doc: /* Return the unibyte charset (set by `set-unibyte-charset'). */)
1156 ()
1157 {
1158 return CHARSET_NAME (CHARSET_FROM_ID (charset_unibyte));
1159 }
1160
1161
1162 DEFUN ("set-unibyte-charset", Fset_unibyte_charset, Sset_unibyte_charset,
1163 1, 1, 0,
1164 doc: /* Set the unibyte charset to CHARSET.
1165 This determines how unibyte/multibyte conversion is done. See also
1166 function `unibyte-charset'. */)
1167 (charset)
1168 Lisp_Object charset;
1169 {
1170 struct charset *cs;
1171 int i, c;
1172
1173 CHECK_CHARSET_GET_CHARSET (charset, cs);
1174 if (! cs->ascii_compatible_p
1175 || cs->dimension != 1)
1176 error ("Inappropriate unibyte charset: %s", SDATA (SYMBOL_NAME (charset)));
1177 charset_unibyte = cs->id;
1178 memset (unibyte_has_multibyte_table, 1, 128);
1179 for (i = 128; i < 256; i++)
1180 {
1181 c = DECODE_CHAR (cs, i);
1182 unibyte_to_multibyte_table[i] = (c < 0 ? BYTE8_TO_CHAR (i) : c);
1183 unibyte_has_multibyte_table[i] = c >= 0;
1184 }
1185
1186 return Qnil;
1187 }
1188
1189
1190 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1191 doc: /* Return the property list of CHARSET. */)
1192 (charset)
1193 Lisp_Object charset;
1194 {
1195 Lisp_Object attrs;
1196
1197 CHECK_CHARSET_GET_ATTR (charset, attrs);
1198 return CHARSET_ATTR_PLIST (attrs);
1199 }
1200
1201
1202 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1203 doc: /* Set CHARSET's property list to PLIST. */)
1204 (charset, plist)
1205 Lisp_Object charset, plist;
1206 {
1207 Lisp_Object attrs;
1208
1209 CHECK_CHARSET_GET_ATTR (charset, attrs);
1210 CHARSET_ATTR_PLIST (attrs) = plist;
1211 return plist;
1212 }
1213
1214
1215 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1216 doc: /* Unify characters of CHARSET with Unicode.
1217 This means reading the relevant file and installing the table defined
1218 by CHARSET's `:unify-map' property.
1219
1220 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1221 the same meaning as the `:unify-map' attribute in the function
1222 `define-charset' (which see).
1223
1224 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1225 (charset, unify_map, deunify)
1226 Lisp_Object charset, unify_map, deunify;
1227 {
1228 int id;
1229 struct charset *cs;
1230
1231 CHECK_CHARSET_GET_ID (charset, id);
1232 cs = CHARSET_FROM_ID (id);
1233 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1234 load_charset (cs);
1235 if (NILP (deunify)
1236 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1237 : ! CHARSET_UNIFIED_P (cs))
1238 return Qnil;
1239
1240 CHARSET_UNIFIED_P (cs) = 0;
1241 if (NILP (deunify))
1242 {
1243 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
1244 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1245 if (NILP (unify_map))
1246 unify_map = CHARSET_UNIFY_MAP (cs);
1247 if (STRINGP (unify_map))
1248 load_charset_map_from_file (cs, unify_map, 2);
1249 else if (VECTORP (unify_map))
1250 load_charset_map_from_vector (cs, unify_map, 2);
1251 else if (NILP (unify_map))
1252 error ("No unify-map for charset");
1253 else
1254 error ("Bad unify-map arg");
1255 CHARSET_UNIFIED_P (cs) = 1;
1256 }
1257 else if (CHAR_TABLE_P (Vchar_unify_table))
1258 {
1259 int min_code = CHARSET_MIN_CODE (cs);
1260 int max_code = CHARSET_MAX_CODE (cs);
1261 int min_char = DECODE_CHAR (cs, min_code);
1262 int max_char = DECODE_CHAR (cs, max_code);
1263
1264 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1265 }
1266
1267 return Qnil;
1268 }
1269
1270 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1271 Sget_unused_iso_final_char, 2, 2, 0,
1272 doc: /*
1273 Return an unused ISO final char for a charset of DIMENISION and CHARS.
1274 DIMENSION is the number of bytes to represent a character: 1 or 2.
1275 CHARS is the number of characters in a dimension: 94 or 96.
1276
1277 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1278 If there's no unused final char for the specified kind of charset,
1279 return nil. */)
1280 (dimension, chars)
1281 Lisp_Object dimension, chars;
1282 {
1283 int final_char;
1284
1285 CHECK_NUMBER (dimension);
1286 CHECK_NUMBER (chars);
1287 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1288 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1289 if (XINT (chars) != 94 && XINT (chars) != 96)
1290 args_out_of_range_3 (chars, make_number (94), make_number (96));
1291 for (final_char = '0'; final_char <= '?'; final_char++)
1292 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1293 break;
1294 return (final_char <= '?' ? make_number (final_char) : Qnil);
1295 }
1296
1297 static void
1298 check_iso_charset_parameter (dimension, chars, final_char)
1299 Lisp_Object dimension, chars, final_char;
1300 {
1301 CHECK_NATNUM (dimension);
1302 CHECK_NATNUM (chars);
1303 CHECK_NATNUM (final_char);
1304
1305 if (XINT (dimension) > 3)
1306 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1307 if (XINT (chars) != 94 && XINT (chars) != 96)
1308 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1309 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1310 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1311 }
1312
1313
1314 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1315 4, 4, 0,
1316 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1317
1318 On decoding by an ISO-2022 base coding system, when a charset
1319 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1320 if CHARSET is designated instead. */)
1321 (dimension, chars, final_char, charset)
1322 Lisp_Object dimension, chars, final_char, charset;
1323 {
1324 int id;
1325 int chars_flag;
1326
1327 CHECK_CHARSET_GET_ID (charset, id);
1328 check_iso_charset_parameter (dimension, chars, final_char);
1329 chars_flag = XINT (chars) == 96;
1330 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1331 return Qnil;
1332 }
1333
1334
1335 /* Return information about charsets in the text at PTR of NBYTES
1336 bytes, which are NCHARS characters. The value is:
1337
1338 0: Each character is represented by one byte. This is always
1339 true for a unibyte string. For a multibyte string, true if
1340 it contains only ASCII characters.
1341
1342 1: No charsets other than ascii, control-1, and latin-1 are
1343 found.
1344
1345 2: Otherwise.
1346 */
1347
1348 int
1349 string_xstring_p (string)
1350 Lisp_Object string;
1351 {
1352 const unsigned char *p = SDATA (string);
1353 const unsigned char *endp = p + SBYTES (string);
1354
1355 if (SCHARS (string) == SBYTES (string))
1356 return 0;
1357
1358 while (p < endp)
1359 {
1360 int c = STRING_CHAR_ADVANCE (p);
1361
1362 if (c >= 0x100)
1363 return 2;
1364 }
1365 return 1;
1366 }
1367
1368
1369 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1370
1371 CHARSETS is a vector. If Nth element is non-nil, it means the
1372 charset whose id is N is already found.
1373
1374 It may lookup a translation table TABLE if supplied. */
1375
1376 static void
1377 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1378 const unsigned char *ptr;
1379 EMACS_INT nchars, nbytes;
1380 Lisp_Object charsets, table;
1381 int multibyte;
1382 {
1383 const unsigned char *pend = ptr + nbytes;
1384
1385 if (nchars == nbytes)
1386 {
1387 if (multibyte)
1388 ASET (charsets, charset_ascii, Qt);
1389 else
1390 while (ptr < pend)
1391 {
1392 int c = *ptr++;
1393
1394 if (!NILP (table))
1395 c = translate_char (table, c);
1396 if (ASCII_BYTE_P (c))
1397 ASET (charsets, charset_ascii, Qt);
1398 else
1399 ASET (charsets, charset_eight_bit, Qt);
1400 }
1401 }
1402 else
1403 {
1404 while (ptr < pend)
1405 {
1406 int c = STRING_CHAR_ADVANCE (ptr);
1407 struct charset *charset;
1408
1409 if (!NILP (table))
1410 c = translate_char (table, c);
1411 charset = CHAR_CHARSET (c);
1412 ASET (charsets, CHARSET_ID (charset), Qt);
1413 }
1414 }
1415 }
1416
1417 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1418 2, 3, 0,
1419 doc: /* Return a list of charsets in the region between BEG and END.
1420 BEG and END are buffer positions.
1421 Optional arg TABLE if non-nil is a translation table to look up.
1422
1423 If the current buffer is unibyte, the returned list may contain
1424 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1425 (beg, end, table)
1426 Lisp_Object beg, end, table;
1427 {
1428 Lisp_Object charsets;
1429 EMACS_INT from, from_byte, to, stop, stop_byte;
1430 int i;
1431 Lisp_Object val;
1432 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1433
1434 validate_region (&beg, &end);
1435 from = XFASTINT (beg);
1436 stop = to = XFASTINT (end);
1437
1438 if (from < GPT && GPT < to)
1439 {
1440 stop = GPT;
1441 stop_byte = GPT_BYTE;
1442 }
1443 else
1444 stop_byte = CHAR_TO_BYTE (stop);
1445
1446 from_byte = CHAR_TO_BYTE (from);
1447
1448 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1449 while (1)
1450 {
1451 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1452 stop_byte - from_byte, charsets, table,
1453 multibyte);
1454 if (stop < to)
1455 {
1456 from = stop, from_byte = stop_byte;
1457 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1458 }
1459 else
1460 break;
1461 }
1462
1463 val = Qnil;
1464 for (i = charset_table_used - 1; i >= 0; i--)
1465 if (!NILP (AREF (charsets, i)))
1466 val = Fcons (CHARSET_NAME (charset_table + i), val);
1467 return val;
1468 }
1469
1470 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1471 1, 2, 0,
1472 doc: /* Return a list of charsets in STR.
1473 Optional arg TABLE if non-nil is a translation table to look up.
1474
1475 If STR is unibyte, the returned list may contain
1476 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1477 (str, table)
1478 Lisp_Object str, table;
1479 {
1480 Lisp_Object charsets;
1481 int i;
1482 Lisp_Object val;
1483
1484 CHECK_STRING (str);
1485
1486 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1487 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1488 charsets, table,
1489 STRING_MULTIBYTE (str));
1490 val = Qnil;
1491 for (i = charset_table_used - 1; i >= 0; i--)
1492 if (!NILP (AREF (charsets, i)))
1493 val = Fcons (CHARSET_NAME (charset_table + i), val);
1494 return val;
1495 }
1496
1497 \f
1498
1499 /* Return a character correponding to the code-point CODE of
1500 CHARSET. */
1501
1502 int
1503 decode_char (charset, code)
1504 struct charset *charset;
1505 unsigned code;
1506 {
1507 int c, char_index;
1508 enum charset_method method = CHARSET_METHOD (charset);
1509
1510 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1511 return -1;
1512
1513 if (method == CHARSET_METHOD_MAP_DEFERRED)
1514 {
1515 load_charset (charset);
1516 method = CHARSET_METHOD (charset);
1517 }
1518
1519 if (method == CHARSET_METHOD_SUBSET)
1520 {
1521 Lisp_Object subset_info;
1522
1523 subset_info = CHARSET_SUBSET (charset);
1524 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1525 code -= XINT (AREF (subset_info, 3));
1526 if (code < XFASTINT (AREF (subset_info, 1))
1527 || code > XFASTINT (AREF (subset_info, 2)))
1528 c = -1;
1529 else
1530 c = DECODE_CHAR (charset, code);
1531 }
1532 else if (method == CHARSET_METHOD_SUPERSET)
1533 {
1534 Lisp_Object parents;
1535
1536 parents = CHARSET_SUPERSET (charset);
1537 c = -1;
1538 for (; CONSP (parents); parents = XCDR (parents))
1539 {
1540 int id = XINT (XCAR (XCAR (parents)));
1541 int code_offset = XINT (XCDR (XCAR (parents)));
1542 unsigned this_code = code - code_offset;
1543
1544 charset = CHARSET_FROM_ID (id);
1545 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1546 break;
1547 }
1548 }
1549 else
1550 {
1551 char_index = CODE_POINT_TO_INDEX (charset, code);
1552 if (char_index < 0)
1553 return -1;
1554
1555 if (method == CHARSET_METHOD_MAP)
1556 {
1557 Lisp_Object decoder;
1558
1559 decoder = CHARSET_DECODER (charset);
1560 if (! VECTORP (decoder))
1561 return -1;
1562 c = XINT (AREF (decoder, char_index));
1563 }
1564 else
1565 {
1566 c = char_index + CHARSET_CODE_OFFSET (charset);
1567 }
1568 }
1569
1570 if (CHARSET_UNIFIED_P (charset)
1571 && c >= 0)
1572 {
1573 MAYBE_UNIFY_CHAR (c);
1574 }
1575
1576 return c;
1577 }
1578
1579 /* Variable used temporarily by the macro ENCODE_CHAR. */
1580 Lisp_Object charset_work;
1581
1582 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1583 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1584 use CHARSET's strict_max_char instead of max_char. */
1585
1586 unsigned
1587 encode_char (charset, c)
1588 struct charset *charset;
1589 int c;
1590 {
1591 unsigned code;
1592 enum charset_method method = CHARSET_METHOD (charset);
1593
1594 if (CHARSET_UNIFIED_P (charset))
1595 {
1596 Lisp_Object deunifier, deunified;
1597
1598 deunifier = CHARSET_DEUNIFIER (charset);
1599 if (! CHAR_TABLE_P (deunifier))
1600 {
1601 Funify_charset (CHARSET_NAME (charset), Qnil, Qnil);
1602 deunifier = CHARSET_DEUNIFIER (charset);
1603 }
1604 deunified = CHAR_TABLE_REF (deunifier, c);
1605 if (! NILP (deunified))
1606 c = XINT (deunified);
1607 }
1608
1609 if (method == CHARSET_METHOD_SUBSET)
1610 {
1611 Lisp_Object subset_info;
1612 struct charset *this_charset;
1613
1614 subset_info = CHARSET_SUBSET (charset);
1615 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1616 code = ENCODE_CHAR (this_charset, c);
1617 if (code == CHARSET_INVALID_CODE (this_charset)
1618 || code < XFASTINT (AREF (subset_info, 1))
1619 || code > XFASTINT (AREF (subset_info, 2)))
1620 return CHARSET_INVALID_CODE (charset);
1621 code += XINT (AREF (subset_info, 3));
1622 return code;
1623 }
1624
1625 if (method == CHARSET_METHOD_SUPERSET)
1626 {
1627 Lisp_Object parents;
1628
1629 parents = CHARSET_SUPERSET (charset);
1630 for (; CONSP (parents); parents = XCDR (parents))
1631 {
1632 int id = XINT (XCAR (XCAR (parents)));
1633 int code_offset = XINT (XCDR (XCAR (parents)));
1634 struct charset *this_charset = CHARSET_FROM_ID (id);
1635
1636 code = ENCODE_CHAR (this_charset, c);
1637 if (code != CHARSET_INVALID_CODE (this_charset))
1638 return code + code_offset;
1639 }
1640 return CHARSET_INVALID_CODE (charset);
1641 }
1642
1643 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1644 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1645 return CHARSET_INVALID_CODE (charset);
1646
1647 if (method == CHARSET_METHOD_MAP_DEFERRED)
1648 {
1649 load_charset (charset);
1650 method = CHARSET_METHOD (charset);
1651 }
1652
1653 if (method == CHARSET_METHOD_MAP)
1654 {
1655 Lisp_Object encoder;
1656 Lisp_Object val;
1657
1658 encoder = CHARSET_ENCODER (charset);
1659 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1660 return CHARSET_INVALID_CODE (charset);
1661 val = CHAR_TABLE_REF (encoder, c);
1662 if (NILP (val))
1663 return CHARSET_INVALID_CODE (charset);
1664 code = XINT (val);
1665 if (! CHARSET_COMPACT_CODES_P (charset))
1666 code = INDEX_TO_CODE_POINT (charset, code);
1667 }
1668 else /* method == CHARSET_METHOD_OFFSET */
1669 {
1670 code = c - CHARSET_CODE_OFFSET (charset);
1671 code = INDEX_TO_CODE_POINT (charset, code);
1672 }
1673
1674 return code;
1675 }
1676
1677
1678 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1679 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1680 Return nil if CODE-POINT is not valid in CHARSET.
1681
1682 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1683
1684 Optional argument RESTRICTION specifies a way to map the pair of CCS
1685 and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1686 (charset, code_point, restriction)
1687 Lisp_Object charset, code_point, restriction;
1688 {
1689 int c, id;
1690 unsigned code;
1691 struct charset *charsetp;
1692
1693 CHECK_CHARSET_GET_ID (charset, id);
1694 if (CONSP (code_point))
1695 {
1696 CHECK_NATNUM_CAR (code_point);
1697 CHECK_NATNUM_CDR (code_point);
1698 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1699 }
1700 else
1701 {
1702 CHECK_NATNUM (code_point);
1703 code = XINT (code_point);
1704 }
1705 charsetp = CHARSET_FROM_ID (id);
1706 c = DECODE_CHAR (charsetp, code);
1707 return (c >= 0 ? make_number (c) : Qnil);
1708 }
1709
1710
1711 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1712 doc: /* Encode the character CH into a code-point of CHARSET.
1713 Return nil if CHARSET doesn't include CH.
1714
1715 Optional argument RESTRICTION specifies a way to map CHAR to a
1716 code-point in CCS. Currently not supported and just ignored. */)
1717 (ch, charset, restriction)
1718 Lisp_Object ch, charset, restriction;
1719 {
1720 int id;
1721 unsigned code;
1722 struct charset *charsetp;
1723
1724 CHECK_CHARSET_GET_ID (charset, id);
1725 CHECK_NATNUM (ch);
1726 charsetp = CHARSET_FROM_ID (id);
1727 code = ENCODE_CHAR (charsetp, XINT (ch));
1728 if (code == CHARSET_INVALID_CODE (charsetp))
1729 return Qnil;
1730 if (code > 0x7FFFFFF)
1731 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1732 return make_number (code);
1733 }
1734
1735
1736 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1737 doc:
1738 /* Return a character of CHARSET whose position codes are CODEn.
1739
1740 CODE1 through CODE4 are optional, but if you don't supply sufficient
1741 position codes, it is assumed that the minimum code in each dimension
1742 is specified. */)
1743 (charset, code1, code2, code3, code4)
1744 Lisp_Object charset, code1, code2, code3, code4;
1745 {
1746 int id, dimension;
1747 struct charset *charsetp;
1748 unsigned code;
1749 int c;
1750
1751 CHECK_CHARSET_GET_ID (charset, id);
1752 charsetp = CHARSET_FROM_ID (id);
1753
1754 dimension = CHARSET_DIMENSION (charsetp);
1755 if (NILP (code1))
1756 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1757 ? 0 : CHARSET_MIN_CODE (charsetp));
1758 else
1759 {
1760 CHECK_NATNUM (code1);
1761 if (XFASTINT (code1) >= 0x100)
1762 args_out_of_range (make_number (0xFF), code1);
1763 code = XFASTINT (code1);
1764
1765 if (dimension > 1)
1766 {
1767 code <<= 8;
1768 if (NILP (code2))
1769 code |= charsetp->code_space[(dimension - 2) * 4];
1770 else
1771 {
1772 CHECK_NATNUM (code2);
1773 if (XFASTINT (code2) >= 0x100)
1774 args_out_of_range (make_number (0xFF), code2);
1775 code |= XFASTINT (code2);
1776 }
1777
1778 if (dimension > 2)
1779 {
1780 code <<= 8;
1781 if (NILP (code3))
1782 code |= charsetp->code_space[(dimension - 3) * 4];
1783 else
1784 {
1785 CHECK_NATNUM (code3);
1786 if (XFASTINT (code3) >= 0x100)
1787 args_out_of_range (make_number (0xFF), code3);
1788 code |= XFASTINT (code3);
1789 }
1790
1791 if (dimension > 3)
1792 {
1793 code <<= 8;
1794 if (NILP (code4))
1795 code |= charsetp->code_space[0];
1796 else
1797 {
1798 CHECK_NATNUM (code4);
1799 if (XFASTINT (code4) >= 0x100)
1800 args_out_of_range (make_number (0xFF), code4);
1801 code |= XFASTINT (code4);
1802 }
1803 }
1804 }
1805 }
1806 }
1807
1808 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1809 code &= 0x7F7F7F7F;
1810 c = DECODE_CHAR (charsetp, code);
1811 if (c < 0)
1812 error ("Invalid code(s)");
1813 return make_number (c);
1814 }
1815
1816
1817 /* Return the first charset in CHARSET_LIST that contains C.
1818 CHARSET_LIST is a list of charset IDs. If it is nil, use
1819 Vcharset_ordered_list. */
1820
1821 struct charset *
1822 char_charset (c, charset_list, code_return)
1823 int c;
1824 Lisp_Object charset_list;
1825 unsigned *code_return;
1826 {
1827 if (NILP (charset_list))
1828 charset_list = Vcharset_ordered_list;
1829
1830 while (CONSP (charset_list))
1831 {
1832 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1833 unsigned code = ENCODE_CHAR (charset, c);
1834
1835 if (code != CHARSET_INVALID_CODE (charset))
1836 {
1837 if (code_return)
1838 *code_return = code;
1839 return charset;
1840 }
1841 charset_list = XCDR (charset_list);
1842 }
1843 return NULL;
1844 }
1845
1846
1847 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1848 doc:
1849 /*Return list of charset and one to four position-codes of CHAR.
1850 The charset is decided by the current priority order of charsets.
1851 A position-code is a byte value of each dimension of the code-point of
1852 CHAR in the charset. */)
1853 (ch)
1854 Lisp_Object ch;
1855 {
1856 struct charset *charset;
1857 int c, dimension;
1858 unsigned code;
1859 Lisp_Object val;
1860
1861 CHECK_CHARACTER (ch);
1862 c = XFASTINT (ch);
1863 charset = CHAR_CHARSET (c);
1864 if (! charset)
1865 abort ();
1866 code = ENCODE_CHAR (charset, c);
1867 if (code == CHARSET_INVALID_CODE (charset))
1868 abort ();
1869 dimension = CHARSET_DIMENSION (charset);
1870 for (val = Qnil; dimension > 0; dimension--)
1871 {
1872 val = Fcons (make_number (code & 0xFF), val);
1873 code >>= 8;
1874 }
1875 return Fcons (CHARSET_NAME (charset), val);
1876 }
1877
1878
1879 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1880 doc: /* Return the charset of highest priority that contains CH. */)
1881 (ch)
1882 Lisp_Object ch;
1883 {
1884 struct charset *charset;
1885
1886 CHECK_CHARACTER (ch);
1887 charset = CHAR_CHARSET (XINT (ch));
1888 return (CHARSET_NAME (charset));
1889 }
1890
1891
1892 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1893 doc: /*
1894 Return charset of a character in the current buffer at position POS.
1895 If POS is nil, it defauls to the current point.
1896 If POS is out of range, the value is nil. */)
1897 (pos)
1898 Lisp_Object pos;
1899 {
1900 Lisp_Object ch;
1901 struct charset *charset;
1902
1903 ch = Fchar_after (pos);
1904 if (! INTEGERP (ch))
1905 return ch;
1906 charset = CHAR_CHARSET (XINT (ch));
1907 return (CHARSET_NAME (charset));
1908 }
1909
1910
1911 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1912 doc: /*
1913 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1914
1915 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1916 by their DIMENSION, CHARS, and FINAL-CHAR,
1917 where as Emacs distinguishes them by charset symbol.
1918 See the documentation of the function `charset-info' for the meanings of
1919 DIMENSION, CHARS, and FINAL-CHAR. */)
1920 (dimension, chars, final_char)
1921 Lisp_Object dimension, chars, final_char;
1922 {
1923 int id;
1924 int chars_flag;
1925
1926 check_iso_charset_parameter (dimension, chars, final_char);
1927 chars_flag = XFASTINT (chars) == 96;
1928 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
1929 XFASTINT (final_char));
1930 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
1931 }
1932
1933
1934 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1935 0, 0, 0,
1936 doc: /*
1937 Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1938 ()
1939 {
1940 int i;
1941 struct charset *charset;
1942 Lisp_Object attrs;
1943
1944 for (i = 0; i < charset_table_used; i++)
1945 {
1946 charset = CHARSET_FROM_ID (i);
1947 attrs = CHARSET_ATTRIBUTES (charset);
1948
1949 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1950 {
1951 CHARSET_ATTR_DECODER (attrs) = Qnil;
1952 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1953 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1954 }
1955
1956 if (CHARSET_UNIFIED_P (charset))
1957 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
1958 }
1959
1960 if (CHAR_TABLE_P (Vchar_unified_charset_table))
1961 {
1962 Foptimize_char_table (Vchar_unified_charset_table);
1963 Vchar_unify_table = Vchar_unified_charset_table;
1964 Vchar_unified_charset_table = Qnil;
1965 }
1966
1967 return Qnil;
1968 }
1969
1970 DEFUN ("charset-priority-list", Fcharset_priority_list,
1971 Scharset_priority_list, 0, 1, 0,
1972 doc: /* Return the list of charsets ordered by priority.
1973 HIGHESTP non-nil means just return the highest priority one. */)
1974 (highestp)
1975 Lisp_Object highestp;
1976 {
1977 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
1978
1979 if (!NILP (highestp))
1980 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
1981
1982 while (!NILP (list))
1983 {
1984 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
1985 list = XCDR (list);
1986 }
1987 return Fnreverse (val);
1988 }
1989
1990 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
1991 1, MANY, 0,
1992 doc: /* Assign higher priority to the charsets given as arguments.
1993 usage: (set-charset-priority &rest charsets) */)
1994 (nargs, args)
1995 int nargs;
1996 Lisp_Object *args;
1997 {
1998 Lisp_Object new_head, old_list, arglist[2];
1999 Lisp_Object list_2022, list_emacs_mule;
2000 int i, id;
2001
2002 old_list = Fcopy_sequence (Vcharset_ordered_list);
2003 new_head = Qnil;
2004 for (i = 0; i < nargs; i++)
2005 {
2006 CHECK_CHARSET_GET_ID (args[i], id);
2007 if (! NILP (Fmemq (make_number (id), old_list)))
2008 {
2009 old_list = Fdelq (make_number (id), old_list);
2010 new_head = Fcons (make_number (id), new_head);
2011 }
2012 }
2013 arglist[0] = Fnreverse (new_head);
2014 arglist[1] = old_list;
2015 Vcharset_ordered_list = Fnconc (2, arglist);
2016 charset_ordered_list_tick++;
2017
2018 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2019 CONSP (old_list); old_list = XCDR (old_list))
2020 {
2021 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2022 list_2022 = Fcons (XCAR (old_list), list_2022);
2023 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2024 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2025 }
2026 Viso_2022_charset_list = Fnreverse (list_2022);
2027 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2028
2029 return Qnil;
2030 }
2031
2032 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2033 0, 1, 0,
2034 doc: /* Internal use only.
2035 Return charset identification number of CHARSET. */)
2036 (charset)
2037 Lisp_Object charset;
2038 {
2039 int id;
2040
2041 CHECK_CHARSET_GET_ID (charset, id);
2042 return make_number (id);
2043 }
2044
2045 \f
2046 void
2047 init_charset ()
2048 {
2049 Vcharset_map_path
2050 = Fcons (Fexpand_file_name (build_string ("charsets"), Vdata_directory),
2051 Qnil);
2052 }
2053
2054
2055 void
2056 init_charset_once ()
2057 {
2058 int i, j, k;
2059
2060 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2061 for (j = 0; j < ISO_MAX_CHARS; j++)
2062 for (k = 0; k < ISO_MAX_FINAL; k++)
2063 iso_charset_table[i][j][k] = -1;
2064
2065 for (i = 0; i < 256; i++)
2066 emacs_mule_charset[i] = NULL;
2067
2068 charset_jisx0201_roman = -1;
2069 charset_jisx0208_1978 = -1;
2070 charset_jisx0208 = -1;
2071
2072 for (i = 0; i < 128; i++)
2073 unibyte_to_multibyte_table[i] = i;
2074 for (; i < 256; i++)
2075 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
2076 }
2077
2078 #ifdef emacs
2079
2080 void
2081 syms_of_charset ()
2082 {
2083 DEFSYM (Qcharsetp, "charsetp");
2084
2085 DEFSYM (Qascii, "ascii");
2086 DEFSYM (Qunicode, "unicode");
2087 DEFSYM (Qeight_bit, "eight-bit");
2088 DEFSYM (Qiso_8859_1, "iso-8859-1");
2089
2090 DEFSYM (Qgl, "gl");
2091 DEFSYM (Qgr, "gr");
2092
2093 staticpro (&Vcharset_ordered_list);
2094 Vcharset_ordered_list = Qnil;
2095
2096 staticpro (&Viso_2022_charset_list);
2097 Viso_2022_charset_list = Qnil;
2098
2099 staticpro (&Vemacs_mule_charset_list);
2100 Vemacs_mule_charset_list = Qnil;
2101
2102 staticpro (&Vcharset_hash_table);
2103 {
2104 Lisp_Object args[2];
2105 args[0] = QCtest;
2106 args[1] = Qeq;
2107 Vcharset_hash_table = Fmake_hash_table (2, args);
2108 }
2109
2110 charset_table_size = 128;
2111 charset_table = ((struct charset *)
2112 xmalloc (sizeof (struct charset) * charset_table_size));
2113 charset_table_used = 0;
2114
2115 staticpro (&Vchar_unified_charset_table);
2116 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
2117
2118 defsubr (&Scharsetp);
2119 defsubr (&Smap_charset_chars);
2120 defsubr (&Sdefine_charset_internal);
2121 defsubr (&Sdefine_charset_alias);
2122 defsubr (&Sunibyte_charset);
2123 defsubr (&Sset_unibyte_charset);
2124 defsubr (&Scharset_plist);
2125 defsubr (&Sset_charset_plist);
2126 defsubr (&Sunify_charset);
2127 defsubr (&Sget_unused_iso_final_char);
2128 defsubr (&Sdeclare_equiv_charset);
2129 defsubr (&Sfind_charset_region);
2130 defsubr (&Sfind_charset_string);
2131 defsubr (&Sdecode_char);
2132 defsubr (&Sencode_char);
2133 defsubr (&Ssplit_char);
2134 defsubr (&Smake_char);
2135 defsubr (&Schar_charset);
2136 defsubr (&Scharset_after);
2137 defsubr (&Siso_charset);
2138 defsubr (&Sclear_charset_maps);
2139 defsubr (&Scharset_priority_list);
2140 defsubr (&Sset_charset_priority);
2141 defsubr (&Scharset_id_internal);
2142
2143 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2144 doc: /* *Lisp of directories to search for charset map files. */);
2145 Vcharset_map_path = Qnil;
2146
2147 DEFVAR_LISP ("charset-list", &Vcharset_list,
2148 doc: /* List of all charsets ever defined. */);
2149 Vcharset_list = Qnil;
2150
2151 charset_ascii
2152 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2153 0, 127, 'B', -1, 0, 1, 0, 0);
2154 charset_iso_8859_1
2155 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2156 0, 255, -1, -1, -1, 1, 0, 0);
2157 charset_unicode
2158 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2159 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2160 charset_eight_bit
2161 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2162 128, 255, -1, 0, -1, 0, 0,
2163 MAX_5_BYTE_CHAR + 1);
2164 }
2165
2166 #endif /* emacs */
2167
2168 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2169 (do not change this comment) */