]> code.delx.au - gnu-emacs/blob - src/charset.c
Revision: emacs@sv.gnu.org/emacs--unicode--0--patch-34
[gnu-emacs] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005,
3 2006 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
7 Copyright (C) 2003, 2004
8 National Institute of Advanced Industrial Science and Technology (AIST)
9 Registration Number H13PRO009
10
11 This file is part of GNU Emacs.
12
13 GNU Emacs is free software; you can redistribute it and/or modify
14 it under the terms of the GNU General Public License as published by
15 the Free Software Foundation; either version 2, or (at your option)
16 any later version.
17
18 GNU Emacs is distributed in the hope that it will be useful,
19 but WITHOUT ANY WARRANTY; without even the implied warranty of
20 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 GNU General Public License for more details.
22
23 You should have received a copy of the GNU General Public License
24 along with GNU Emacs; see the file COPYING. If not, write to
25 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
26 Boston, MA 02110-1301, USA. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include "lisp.h"
35 #include "character.h"
36 #include "charset.h"
37 #include "coding.h"
38 #include "disptab.h"
39 #include "buffer.h"
40
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
42
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
48
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
53
54 */
55
56 /* List of all charsets. This variable is used only from Emacs
57 Lisp. */
58 Lisp_Object Vcharset_list;
59
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table;
63
64 /* Table of struct charset. */
65 struct charset *charset_table;
66
67 static int charset_table_size;
68 static int charset_table_used;
69
70 Lisp_Object Qcharsetp;
71
72 /* Special charset symbols. */
73 Lisp_Object Qascii;
74 Lisp_Object Qeight_bit;
75 Lisp_Object Qiso_8859_1;
76 Lisp_Object Qunicode;
77
78 /* The corresponding charsets. */
79 int charset_ascii;
80 int charset_eight_bit;
81 int charset_iso_8859_1;
82 int charset_unicode;
83
84 /* The other special charsets. */
85 int charset_jisx0201_roman;
86 int charset_jisx0208_1978;
87 int charset_jisx0208;
88
89 /* Value of charset attribute `charset-iso-plane'. */
90 Lisp_Object Qgl, Qgr;
91
92 /* Charset of unibyte characters. */
93 int charset_unibyte;
94
95 /* List of charsets ordered by the priority. */
96 Lisp_Object Vcharset_ordered_list;
97
98 /* Incremented everytime we change Vcharset_ordered_list. This is
99 unsigned short so that it fits in Lisp_Int and never matches
100 -1. */
101 unsigned short charset_ordered_list_tick;
102
103 /* List of iso-2022 charsets. */
104 Lisp_Object Viso_2022_charset_list;
105
106 /* List of emacs-mule charsets. */
107 Lisp_Object Vemacs_mule_charset_list;
108
109 struct charset *emacs_mule_charset[256];
110
111 /* Mapping table from ISO2022's charset (specified by DIMENSION,
112 CHARS, and FINAL-CHAR) to Emacs' charset. */
113 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
114
115 Lisp_Object Vcharset_map_path;
116
117 Lisp_Object Vchar_unified_charset_table;
118
119 /* Defined in chartab.c */
120 extern void
121 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
122 Lisp_Object function, Lisp_Object table,
123 Lisp_Object arg, struct charset *charset,
124 unsigned from, unsigned to));
125
126 #define CODE_POINT_TO_INDEX(charset, code) \
127 ((charset)->code_linear_p \
128 ? (code) - (charset)->min_code \
129 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
130 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
131 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
132 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
133 ? (((((code) >> 24) - (charset)->code_space[12]) \
134 * (charset)->code_space[11]) \
135 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
136 * (charset)->code_space[7]) \
137 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
138 * (charset)->code_space[3]) \
139 + (((code) & 0xFF) - (charset)->code_space[0]) \
140 - ((charset)->char_index_offset)) \
141 : -1)
142
143
144 /* Convert the character index IDX to code-point CODE for CHARSET.
145 It is assumed that IDX is in a valid range. */
146
147 #define INDEX_TO_CODE_POINT(charset, idx) \
148 ((charset)->code_linear_p \
149 ? (idx) + (charset)->min_code \
150 : (idx += (charset)->char_index_offset, \
151 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
152 | (((charset)->code_space[4] \
153 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
154 << 8) \
155 | (((charset)->code_space[8] \
156 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
157 << 16) \
158 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
159 << 24))))
160
161
162 \f
163
164 /* Set to 1 to warn that a charset map is loaded and thus a buffer
165 text and a string data may be relocated. */
166 int charset_map_loaded;
167
168 struct charset_map_entries
169 {
170 struct {
171 unsigned from, to;
172 int c;
173 } entry[0x10000];
174 struct charset_map_entries *next;
175 };
176
177 /* Load the mapping information for CHARSET from ENTRIES.
178
179 If CONTROL_FLAG is 0, setup CHARSET->min_char and CHARSET->max_char.
180
181 If CONTROL_FLAG is 1, setup CHARSET->min_char, CHARSET->max_char,
182 CHARSET->decoder, and CHARSET->encoder.
183
184 If CONTROL_FLAG is 2, setup CHARSET->deunifier and
185 Vchar_unify_table. If Vchar_unified_charset_table is non-nil,
186 setup it too. */
187
188 static void
189 load_charset_map (charset, entries, n_entries, control_flag)
190 struct charset *charset;
191 struct charset_map_entries *entries;
192 int n_entries;
193 int control_flag;
194 {
195 Lisp_Object vec, table;
196 unsigned max_code = CHARSET_MAX_CODE (charset);
197 int ascii_compatible_p = charset->ascii_compatible_p;
198 int min_char, max_char, nonascii_min_char;
199 int i;
200 unsigned char *fast_map = charset->fast_map;
201
202 if (n_entries <= 0)
203 return;
204
205 if (control_flag > 0)
206 {
207 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
208
209 table = Fmake_char_table (Qnil, Qnil);
210 if (control_flag == 1)
211 vec = Fmake_vector (make_number (n), make_number (-1));
212 else if (! CHAR_TABLE_P (Vchar_unify_table))
213 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
214
215 charset_map_loaded = 1;
216 }
217
218 min_char = max_char = entries->entry[0].c;
219 nonascii_min_char = MAX_CHAR;
220 for (i = 0; i < n_entries; i++)
221 {
222 unsigned from, to;
223 int from_index, to_index;
224 int from_c, to_c;
225 int idx = i % 0x10000;
226
227 if (i > 0 && idx == 0)
228 entries = entries->next;
229 from = entries->entry[idx].from;
230 to = entries->entry[idx].to;
231 from_c = entries->entry[idx].c;
232 from_index = CODE_POINT_TO_INDEX (charset, from);
233 if (from == to)
234 {
235 to_index = from_index;
236 to_c = from_c;
237 }
238 else
239 {
240 to_index = CODE_POINT_TO_INDEX (charset, to);
241 to_c = from_c + (to_index - from_index);
242 }
243 if (from_index < 0 || to_index < 0)
244 continue;
245
246 if (control_flag < 2)
247 {
248 int c;
249
250 if (to_c > max_char)
251 max_char = to_c;
252 else if (from_c < min_char)
253 min_char = from_c;
254 if (ascii_compatible_p)
255 {
256 if (! ASCII_BYTE_P (from_c))
257 {
258 if (from_c < nonascii_min_char)
259 nonascii_min_char = from_c;
260 }
261 else if (! ASCII_BYTE_P (to_c))
262 {
263 nonascii_min_char = 0x80;
264 }
265 }
266
267 for (c = from_c; c <= to_c; c++)
268 CHARSET_FAST_MAP_SET (c, fast_map);
269
270 if (control_flag == 1)
271 {
272 unsigned code = from;
273
274 if (CHARSET_COMPACT_CODES_P (charset))
275 while (1)
276 {
277 ASET (vec, from_index, make_number (from_c));
278 if (NILP (CHAR_TABLE_REF (table, from_c)))
279 CHAR_TABLE_SET (table, from_c, make_number (code));
280 if (from_index == to_index)
281 break;
282 from_index++, from_c++;
283 code = INDEX_TO_CODE_POINT (charset, from_index);
284 }
285 else
286 for (; from_index <= to_index; from_index++, from_c++)
287 {
288 ASET (vec, from_index, make_number (from_c));
289 if (NILP (CHAR_TABLE_REF (table, from_c)))
290 CHAR_TABLE_SET (table, from_c, make_number (from_index));
291 }
292 }
293 }
294 else
295 {
296 unsigned code = from;
297
298 while (1)
299 {
300 int c1 = DECODE_CHAR (charset, code);
301
302 if (c1 >= 0)
303 {
304 CHAR_TABLE_SET (table, from_c, make_number (c1));
305 CHAR_TABLE_SET (Vchar_unify_table, c1, make_number (from_c));
306 if (CHAR_TABLE_P (Vchar_unified_charset_table))
307 CHAR_TABLE_SET (Vchar_unified_charset_table, c1,
308 CHARSET_NAME (charset));
309 }
310 if (from_index == to_index)
311 break;
312 from_index++, from_c++;
313 code = INDEX_TO_CODE_POINT (charset, from_index);
314 }
315 }
316 }
317
318 if (control_flag < 2)
319 {
320 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
321 ? nonascii_min_char : min_char);
322 CHARSET_MAX_CHAR (charset) = max_char;
323 if (control_flag == 1)
324 {
325 CHARSET_DECODER (charset) = vec;
326 CHARSET_ENCODER (charset) = table;
327 }
328 }
329 else
330 CHARSET_DEUNIFIER (charset) = table;
331 }
332
333
334 /* Read a hexadecimal number (preceded by "0x") from the file FP while
335 paying attention to comment charcter '#'. */
336
337 static INLINE unsigned
338 read_hex (fp, eof)
339 FILE *fp;
340 int *eof;
341 {
342 int c;
343 unsigned n;
344
345 while ((c = getc (fp)) != EOF)
346 {
347 if (c == '#')
348 {
349 while ((c = getc (fp)) != EOF && c != '\n');
350 }
351 else if (c == '0')
352 {
353 if ((c = getc (fp)) == EOF || c == 'x')
354 break;
355 }
356 }
357 if (c == EOF)
358 {
359 *eof = 1;
360 return 0;
361 }
362 *eof = 0;
363 n = 0;
364 if (c == 'x')
365 while ((c = getc (fp)) != EOF && isxdigit (c))
366 n = ((n << 4)
367 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
368 else
369 while ((c = getc (fp)) != EOF && isdigit (c))
370 n = (n * 10) + c - '0';
371 if (c != EOF)
372 ungetc (c, fp);
373 return n;
374 }
375
376
377 /* Return a mapping vector for CHARSET loaded from MAPFILE.
378 Each line of MAPFILE has this form
379 0xAAAA 0xCCCC
380 where 0xAAAA is a code-point and 0xCCCC is the corresponding
381 character code, or this form
382 0xAAAA-0xBBBB 0xCCCC
383 where 0xAAAA and 0xBBBB are code-points specifying a range, and
384 0xCCCC is the first character code of the range.
385
386 The returned vector has this form:
387 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
388 where CODE1 is a code-point or a cons of code-points specifying a
389 range. */
390
391 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
392
393 static void
394 load_charset_map_from_file (charset, mapfile, control_flag)
395 struct charset *charset;
396 Lisp_Object mapfile;
397 int control_flag;
398 {
399 unsigned min_code = CHARSET_MIN_CODE (charset);
400 unsigned max_code = CHARSET_MAX_CODE (charset);
401 int fd;
402 FILE *fp;
403 int eof;
404 Lisp_Object suffixes;
405 struct charset_map_entries *head, *entries;
406 int n_entries;
407
408 suffixes = Fcons (build_string (".map"),
409 Fcons (build_string (".TXT"), Qnil));
410
411 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
412 if (fd < 0
413 || ! (fp = fdopen (fd, "r")))
414 {
415 add_to_log ("Failure in loading charset map: %S", mapfile, Qnil);
416 return;
417 }
418
419 head = entries = ((struct charset_map_entries *)
420 alloca (sizeof (struct charset_map_entries)));
421 n_entries = 0;
422 eof = 0;
423 while (1)
424 {
425 unsigned from, to;
426 int c;
427 int idx;
428
429 from = read_hex (fp, &eof);
430 if (eof)
431 break;
432 if (getc (fp) == '-')
433 to = read_hex (fp, &eof);
434 else
435 to = from;
436 c = (int) read_hex (fp, &eof);
437
438 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
439 continue;
440
441 if (n_entries > 0 && (n_entries % 0x10000) == 0)
442 {
443 entries->next = ((struct charset_map_entries *)
444 alloca (sizeof (struct charset_map_entries)));
445 entries = entries->next;
446 }
447 idx = n_entries % 0x10000;
448 entries->entry[idx].from = from;
449 entries->entry[idx].to = to;
450 entries->entry[idx].c = c;
451 n_entries++;
452 }
453 fclose (fp);
454 close (fd);
455
456 load_charset_map (charset, head, n_entries, control_flag);
457 }
458
459 static void
460 load_charset_map_from_vector (charset, vec, control_flag)
461 struct charset *charset;
462 Lisp_Object vec;
463 int control_flag;
464 {
465 unsigned min_code = CHARSET_MIN_CODE (charset);
466 unsigned max_code = CHARSET_MAX_CODE (charset);
467 struct charset_map_entries *head, *entries;
468 int n_entries;
469 int len = ASIZE (vec);
470 int i;
471
472 if (len % 2 == 1)
473 {
474 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
475 return;
476 }
477
478 head = entries = ((struct charset_map_entries *)
479 alloca (sizeof (struct charset_map_entries)));
480 n_entries = 0;
481 for (i = 0; i < len; i += 2)
482 {
483 Lisp_Object val, val2;
484 unsigned from, to;
485 int c;
486 int idx;
487
488 val = AREF (vec, i);
489 if (CONSP (val))
490 {
491 val2 = XCDR (val);
492 val = XCAR (val);
493 CHECK_NATNUM (val);
494 CHECK_NATNUM (val2);
495 from = XFASTINT (val);
496 to = XFASTINT (val2);
497 }
498 else
499 {
500 CHECK_NATNUM (val);
501 from = to = XFASTINT (val);
502 }
503 val = AREF (vec, i + 1);
504 CHECK_NATNUM (val);
505 c = XFASTINT (val);
506
507 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
508 continue;
509
510 if (n_entries > 0 && (n_entries % 0x10000) == 0)
511 {
512 entries->next = ((struct charset_map_entries *)
513 alloca (sizeof (struct charset_map_entries)));
514 entries = entries->next;
515 }
516 idx = n_entries % 0x10000;
517 entries->entry[idx].from = from;
518 entries->entry[idx].to = to;
519 entries->entry[idx].c = c;
520 n_entries++;
521 }
522
523 load_charset_map (charset, head, n_entries, control_flag);
524 }
525
526 static void
527 load_charset (charset)
528 struct charset *charset;
529 {
530 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
531 {
532 Lisp_Object map;
533
534 map = CHARSET_MAP (charset);
535 if (STRINGP (map))
536 load_charset_map_from_file (charset, map, 1);
537 else
538 load_charset_map_from_vector (charset, map, 1);
539 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP;
540 }
541 }
542
543
544 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
545 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
546 (object)
547 Lisp_Object object;
548 {
549 return (CHARSETP (object) ? Qt : Qnil);
550 }
551
552
553 void
554 map_charset_chars (c_function, function, arg,
555 charset, from, to)
556 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
557 Lisp_Object function, arg;
558 struct charset *charset;
559 unsigned from, to;
560 {
561 Lisp_Object range;
562 int partial;
563
564 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP_DEFERRED)
565 load_charset (charset);
566
567 partial = (from > CHARSET_MIN_CODE (charset)
568 || to < CHARSET_MAX_CODE (charset));
569
570 if (CHARSET_UNIFIED_P (charset)
571 && CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
572 {
573 map_char_table_for_charset (c_function, function,
574 CHARSET_DEUNIFIER (charset), arg,
575 partial ? charset : NULL, from, to);
576 }
577
578 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
579 {
580 int from_idx = CODE_POINT_TO_INDEX (charset, from);
581 int to_idx = CODE_POINT_TO_INDEX (charset, to);
582 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
583 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
584
585 range = Fcons (make_number (from_c), make_number (to_c));
586 if (NILP (function))
587 (*c_function) (arg, range);
588 else
589 call2 (function, range, arg);
590 }
591 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
592 {
593 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
594 return;
595 map_char_table_for_charset (c_function, function,
596 CHARSET_ENCODER (charset), arg,
597 partial ? charset : NULL, from, to);
598 }
599 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
600 {
601 Lisp_Object subset_info;
602 int offset;
603
604 subset_info = CHARSET_SUBSET (charset);
605 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
606 offset = XINT (AREF (subset_info, 3));
607 from -= offset;
608 if (from < XFASTINT (AREF (subset_info, 1)))
609 from = XFASTINT (AREF (subset_info, 1));
610 to -= offset;
611 if (to > XFASTINT (AREF (subset_info, 2)))
612 to = XFASTINT (AREF (subset_info, 2));
613 map_charset_chars (c_function, function, arg, charset, from, to);
614 }
615 else /* i.e. CHARSET_METHOD_SUPERSET */
616 {
617 Lisp_Object parents;
618
619 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
620 parents = XCDR (parents))
621 {
622 int offset;
623 unsigned this_from, this_to;
624
625 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
626 offset = XINT (XCDR (XCAR (parents)));
627 this_from = from - offset;
628 this_to = to - offset;
629 if (this_from < CHARSET_MIN_CODE (charset))
630 this_from = CHARSET_MIN_CODE (charset);
631 if (this_to > CHARSET_MAX_CODE (charset))
632 this_to = CHARSET_MAX_CODE (charset);
633 map_charset_chars (c_function, function, arg, charset,
634 this_from, this_to);
635 }
636 }
637 }
638
639 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
640 doc: /* Call FUNCTION for all characters in CHARSET.
641 FUNCTION is called with an argument RANGE and the optional 3rd
642 argument ARG.
643
644 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
645 characters contained in CHARSET.
646
647 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
648 range of code points of target characters. */)
649 (function, charset, arg, from_code, to_code)
650 Lisp_Object function, charset, arg, from_code, to_code;
651 {
652 struct charset *cs;
653 unsigned from, to;
654
655 CHECK_CHARSET_GET_CHARSET (charset, cs);
656 if (NILP (from_code))
657 from = CHARSET_MIN_CODE (cs);
658 else
659 {
660 CHECK_NATNUM (from_code);
661 from = XINT (from_code);
662 if (from < CHARSET_MIN_CODE (cs))
663 from = CHARSET_MIN_CODE (cs);
664 }
665 if (NILP (to_code))
666 to = CHARSET_MAX_CODE (cs);
667 else
668 {
669 CHECK_NATNUM (to_code);
670 to = XINT (to_code);
671 if (to > CHARSET_MAX_CODE (cs))
672 to = CHARSET_MAX_CODE (cs);
673 }
674 map_charset_chars (NULL, function, arg, cs, from, to);
675 return Qnil;
676 }
677
678
679 /* Define a charset according to the arguments. The Nth argument is
680 the Nth attribute of the charset (the last attribute `charset-id'
681 is not included). See the docstring of `define-charset' for the
682 detail. */
683
684 DEFUN ("define-charset-internal", Fdefine_charset_internal,
685 Sdefine_charset_internal, charset_arg_max, MANY, 0,
686 doc: /* For internal use only.
687 usage: (define-charset-internal ...) */)
688 (nargs, args)
689 int nargs;
690 Lisp_Object *args;
691 {
692 /* Charset attr vector. */
693 Lisp_Object attrs;
694 Lisp_Object val;
695 unsigned hash_code;
696 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
697 int i, j;
698 struct charset charset;
699 int id;
700 int dimension;
701 int new_definition_p;
702 int nchars;
703
704 if (nargs != charset_arg_max)
705 return Fsignal (Qwrong_number_of_arguments,
706 Fcons (intern ("define-charset-internal"),
707 make_number (nargs)));
708
709 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
710
711 CHECK_SYMBOL (args[charset_arg_name]);
712 ASET (attrs, charset_name, args[charset_arg_name]);
713
714 val = args[charset_arg_code_space];
715 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
716 {
717 int min_byte, max_byte;
718
719 min_byte = XINT (Faref (val, make_number (i * 2)));
720 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
721 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
722 error ("Invalid :code-space value");
723 charset.code_space[i * 4] = min_byte;
724 charset.code_space[i * 4 + 1] = max_byte;
725 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
726 nchars *= charset.code_space[i * 4 + 2];
727 charset.code_space[i * 4 + 3] = nchars;
728 if (max_byte > 0)
729 dimension = i + 1;
730 }
731
732 val = args[charset_arg_dimension];
733 if (NILP (val))
734 charset.dimension = dimension;
735 else
736 {
737 CHECK_NATNUM (val);
738 charset.dimension = XINT (val);
739 if (charset.dimension < 1 || charset.dimension > 4)
740 args_out_of_range_3 (val, make_number (1), make_number (4));
741 }
742
743 charset.code_linear_p
744 = (charset.dimension == 1
745 || (charset.code_space[2] == 256
746 && (charset.dimension == 2
747 || (charset.code_space[6] == 256
748 && (charset.dimension == 3
749 || charset.code_space[10] == 256)))));
750
751 if (! charset.code_linear_p)
752 {
753 charset.code_space_mask = (unsigned char *) xmalloc (256);
754 bzero (charset.code_space_mask, 256);
755 for (i = 0; i < 4; i++)
756 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
757 j++)
758 charset.code_space_mask[j] |= (1 << i);
759 }
760
761 charset.iso_chars_96 = charset.code_space[2] == 96;
762
763 charset.min_code = (charset.code_space[0]
764 | (charset.code_space[4] << 8)
765 | (charset.code_space[8] << 16)
766 | (charset.code_space[12] << 24));
767 charset.max_code = (charset.code_space[1]
768 | (charset.code_space[5] << 8)
769 | (charset.code_space[9] << 16)
770 | (charset.code_space[13] << 24));
771 charset.char_index_offset = 0;
772
773 val = args[charset_arg_min_code];
774 if (! NILP (val))
775 {
776 unsigned code;
777
778 if (INTEGERP (val))
779 code = XINT (val);
780 else
781 {
782 CHECK_CONS (val);
783 CHECK_NUMBER_CAR (val);
784 CHECK_NUMBER_CDR (val);
785 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
786 }
787 if (code < charset.min_code
788 || code > charset.max_code)
789 args_out_of_range_3 (make_number (charset.min_code),
790 make_number (charset.max_code), val);
791 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
792 charset.min_code = code;
793 }
794
795 val = args[charset_arg_max_code];
796 if (! NILP (val))
797 {
798 unsigned code;
799
800 if (INTEGERP (val))
801 code = XINT (val);
802 else
803 {
804 CHECK_CONS (val);
805 CHECK_NUMBER_CAR (val);
806 CHECK_NUMBER_CDR (val);
807 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
808 }
809 if (code < charset.min_code
810 || code > charset.max_code)
811 args_out_of_range_3 (make_number (charset.min_code),
812 make_number (charset.max_code), val);
813 charset.max_code = code;
814 }
815
816 charset.compact_codes_p = charset.max_code < 0x1000000;
817
818 val = args[charset_arg_invalid_code];
819 if (NILP (val))
820 {
821 if (charset.min_code > 0)
822 charset.invalid_code = 0;
823 else
824 {
825 XSETINT (val, charset.max_code + 1);
826 if (XINT (val) == charset.max_code + 1)
827 charset.invalid_code = charset.max_code + 1;
828 else
829 error ("Attribute :invalid-code must be specified");
830 }
831 }
832 else
833 {
834 CHECK_NATNUM (val);
835 charset.invalid_code = XFASTINT (val);
836 }
837
838 val = args[charset_arg_iso_final];
839 if (NILP (val))
840 charset.iso_final = -1;
841 else
842 {
843 CHECK_NUMBER (val);
844 if (XINT (val) < '0' || XINT (val) > 127)
845 error ("Invalid iso-final-char: %d", XINT (val));
846 charset.iso_final = XINT (val);
847 }
848
849 val = args[charset_arg_iso_revision];
850 if (NILP (val))
851 charset.iso_revision = -1;
852 else
853 {
854 CHECK_NUMBER (val);
855 if (XINT (val) > 63)
856 args_out_of_range (make_number (63), val);
857 charset.iso_revision = XINT (val);
858 }
859
860 val = args[charset_arg_emacs_mule_id];
861 if (NILP (val))
862 charset.emacs_mule_id = -1;
863 else
864 {
865 CHECK_NATNUM (val);
866 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
867 error ("Invalid emacs-mule-id: %d", XINT (val));
868 charset.emacs_mule_id = XINT (val);
869 }
870
871 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
872
873 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
874
875 charset.unified_p = 0;
876
877 bzero (charset.fast_map, sizeof (charset.fast_map));
878
879 if (! NILP (args[charset_arg_code_offset]))
880 {
881 val = args[charset_arg_code_offset];
882 CHECK_NUMBER (val);
883
884 charset.method = CHARSET_METHOD_OFFSET;
885 charset.code_offset = XINT (val);
886
887 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
888 charset.min_char = i + charset.code_offset;
889 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
890 charset.max_char = i + charset.code_offset;
891 if (charset.max_char > MAX_CHAR)
892 error ("Unsupported max char: %d", charset.max_char);
893
894 i = (charset.min_char >> 7) << 7;
895 for (; i < 0x10000 && i <= charset.max_char; i += 128)
896 CHARSET_FAST_MAP_SET (i, charset.fast_map);
897 i = (i >> 12) << 12;
898 for (; i <= charset.max_char; i += 0x1000)
899 CHARSET_FAST_MAP_SET (i, charset.fast_map);
900 }
901 else if (! NILP (args[charset_arg_map]))
902 {
903 val = args[charset_arg_map];
904 ASET (attrs, charset_map, val);
905 if (STRINGP (val))
906 load_charset_map_from_file (&charset, val, 0);
907 else
908 load_charset_map_from_vector (&charset, val, 0);
909 charset.method = CHARSET_METHOD_MAP_DEFERRED;
910 }
911 else if (! NILP (args[charset_arg_subset]))
912 {
913 Lisp_Object parent;
914 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
915 struct charset *parent_charset;
916
917 val = args[charset_arg_subset];
918 parent = Fcar (val);
919 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
920 parent_min_code = Fnth (make_number (1), val);
921 CHECK_NATNUM (parent_min_code);
922 parent_max_code = Fnth (make_number (2), val);
923 CHECK_NATNUM (parent_max_code);
924 parent_code_offset = Fnth (make_number (3), val);
925 CHECK_NUMBER (parent_code_offset);
926 val = Fmake_vector (make_number (4), Qnil);
927 ASET (val, 0, make_number (parent_charset->id));
928 ASET (val, 1, parent_min_code);
929 ASET (val, 2, parent_max_code);
930 ASET (val, 3, parent_code_offset);
931 ASET (attrs, charset_subset, val);
932
933 charset.method = CHARSET_METHOD_SUBSET;
934 /* Here, we just copy the parent's fast_map. It's not accurate,
935 but at least it works for quickly detecting which character
936 DOESN'T belong to this charset. */
937 for (i = 0; i < 190; i++)
938 charset.fast_map[i] = parent_charset->fast_map[i];
939
940 /* We also copy these for parents. */
941 charset.min_char = parent_charset->min_char;
942 charset.max_char = parent_charset->max_char;
943 }
944 else if (! NILP (args[charset_arg_superset]))
945 {
946 val = args[charset_arg_superset];
947 charset.method = CHARSET_METHOD_SUPERSET;
948 val = Fcopy_sequence (val);
949 ASET (attrs, charset_superset, val);
950
951 charset.min_char = MAX_CHAR;
952 charset.max_char = 0;
953 for (; ! NILP (val); val = Fcdr (val))
954 {
955 Lisp_Object elt, car_part, cdr_part;
956 int this_id, offset;
957 struct charset *this_charset;
958
959 elt = Fcar (val);
960 if (CONSP (elt))
961 {
962 car_part = XCAR (elt);
963 cdr_part = XCDR (elt);
964 CHECK_CHARSET_GET_ID (car_part, this_id);
965 CHECK_NUMBER (cdr_part);
966 offset = XINT (cdr_part);
967 }
968 else
969 {
970 CHECK_CHARSET_GET_ID (elt, this_id);
971 offset = 0;
972 }
973 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
974
975 this_charset = CHARSET_FROM_ID (this_id);
976 if (charset.min_char > this_charset->min_char)
977 charset.min_char = this_charset->min_char;
978 if (charset.max_char < this_charset->max_char)
979 charset.max_char = this_charset->max_char;
980 for (i = 0; i < 190; i++)
981 charset.fast_map[i] |= this_charset->fast_map[i];
982 }
983 }
984 else
985 error ("None of :code-offset, :map, :parents are specified");
986
987 val = args[charset_arg_unify_map];
988 if (! NILP (val) && !STRINGP (val))
989 CHECK_VECTOR (val);
990 ASET (attrs, charset_unify_map, val);
991
992 CHECK_LIST (args[charset_arg_plist]);
993 ASET (attrs, charset_plist, args[charset_arg_plist]);
994
995 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
996 &hash_code);
997 if (charset.hash_index >= 0)
998 {
999 new_definition_p = 0;
1000 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1001 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1002 }
1003 else
1004 {
1005 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1006 hash_code);
1007 if (charset_table_used == charset_table_size)
1008 {
1009 struct charset *new_table
1010 = (struct charset *) xmalloc (sizeof (struct charset)
1011 * (charset_table_size + 16));
1012 bcopy (charset_table, new_table,
1013 sizeof (struct charset) * charset_table_size);
1014 charset_table_size += 16;
1015 charset_table = new_table;
1016 }
1017 id = charset_table_used++;
1018 new_definition_p = 1;
1019 }
1020
1021 ASET (attrs, charset_id, make_number (id));
1022 charset.id = id;
1023 charset_table[id] = charset;
1024
1025 if (charset.iso_final >= 0)
1026 {
1027 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1028 charset.iso_final) = id;
1029 if (new_definition_p)
1030 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1031 Fcons (make_number (id), Qnil));
1032 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1033 charset_jisx0201_roman = id;
1034 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1035 charset_jisx0208_1978 = id;
1036 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1037 charset_jisx0208 = id;
1038 }
1039
1040 if (charset.emacs_mule_id >= 0)
1041 {
1042 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1043 if (charset.emacs_mule_id < 0xA0)
1044 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1045 if (new_definition_p)
1046 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1047 Fcons (make_number (id), Qnil));
1048 }
1049
1050 if (new_definition_p)
1051 {
1052 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1053 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1054 Fcons (make_number (id), Qnil));
1055 charset_ordered_list_tick++;
1056 }
1057
1058 return Qnil;
1059 }
1060
1061
1062 /* Same as Fdefine_charset_internal but arguments are more convenient
1063 to call from C (typically in syms_of_charset). This can define a
1064 charset of `offset' method only. Return the ID of the new
1065 charset. */
1066
1067 static int
1068 define_charset_internal (name, dimension, code_space, min_code, max_code,
1069 iso_final, iso_revision, emacs_mule_id,
1070 ascii_compatible, supplementary,
1071 code_offset)
1072 Lisp_Object name;
1073 int dimension;
1074 unsigned char *code_space;
1075 unsigned min_code, max_code;
1076 int iso_final, iso_revision, emacs_mule_id;
1077 int ascii_compatible, supplementary;
1078 int code_offset;
1079 {
1080 Lisp_Object args[charset_arg_max];
1081 Lisp_Object plist[14];
1082 Lisp_Object val;
1083 int i;
1084
1085 args[charset_arg_name] = name;
1086 args[charset_arg_dimension] = make_number (dimension);
1087 val = Fmake_vector (make_number (8), make_number (0));
1088 for (i = 0; i < 8; i++)
1089 ASET (val, i, make_number (code_space[i]));
1090 args[charset_arg_code_space] = val;
1091 args[charset_arg_min_code] = make_number (min_code);
1092 args[charset_arg_max_code] = make_number (max_code);
1093 args[charset_arg_iso_final]
1094 = (iso_final < 0 ? Qnil : make_number (iso_final));
1095 args[charset_arg_iso_revision] = make_number (iso_revision);
1096 args[charset_arg_emacs_mule_id]
1097 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1098 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1099 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1100 args[charset_arg_invalid_code] = Qnil;
1101 args[charset_arg_code_offset] = make_number (code_offset);
1102 args[charset_arg_map] = Qnil;
1103 args[charset_arg_subset] = Qnil;
1104 args[charset_arg_superset] = Qnil;
1105 args[charset_arg_unify_map] = Qnil;
1106
1107 plist[0] = intern (":name");
1108 plist[1] = args[charset_arg_name];
1109 plist[2] = intern (":dimension");
1110 plist[3] = args[charset_arg_dimension];
1111 plist[4] = intern (":code-space");
1112 plist[5] = args[charset_arg_code_space];
1113 plist[6] = intern (":iso-final-char");
1114 plist[7] = args[charset_arg_iso_final];
1115 plist[8] = intern (":emacs-mule-id");
1116 plist[9] = args[charset_arg_emacs_mule_id];
1117 plist[10] = intern (":ascii-compatible-p");
1118 plist[11] = args[charset_arg_ascii_compatible_p];
1119 plist[12] = intern (":code-offset");
1120 plist[13] = args[charset_arg_code_offset];
1121
1122 args[charset_arg_plist] = Flist (14, plist);
1123 Fdefine_charset_internal (charset_arg_max, args);
1124
1125 return XINT (CHARSET_SYMBOL_ID (name));
1126 }
1127
1128
1129 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1130 Sdefine_charset_alias, 2, 2, 0,
1131 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1132 (alias, charset)
1133 Lisp_Object alias, charset;
1134 {
1135 Lisp_Object attr;
1136
1137 CHECK_CHARSET_GET_ATTR (charset, attr);
1138 Fputhash (alias, attr, Vcharset_hash_table);
1139 Vcharset_list = Fcons (alias, Vcharset_list);
1140 return Qnil;
1141 }
1142
1143
1144 DEFUN ("unibyte-charset", Funibyte_charset, Sunibyte_charset, 0, 0, 0,
1145 doc: /* Return the unibyte charset (set by `set-unibyte-charset'). */)
1146 ()
1147 {
1148 return CHARSET_NAME (CHARSET_FROM_ID (charset_unibyte));
1149 }
1150
1151
1152 DEFUN ("set-unibyte-charset", Fset_unibyte_charset, Sset_unibyte_charset,
1153 1, 1, 0,
1154 doc: /* Set the unibyte charset to CHARSET.
1155 This determines how unibyte/multibyte conversion is done. See also
1156 function `unibyte-charset'. */)
1157 (charset)
1158 Lisp_Object charset;
1159 {
1160 struct charset *cs;
1161 int i, c;
1162
1163 CHECK_CHARSET_GET_CHARSET (charset, cs);
1164 if (! cs->ascii_compatible_p
1165 || cs->dimension != 1)
1166 error ("Inappropriate unibyte charset: %s", SDATA (SYMBOL_NAME (charset)));
1167 charset_unibyte = cs->id;
1168 memset (unibyte_has_multibyte_table, 1, 128);
1169 for (i = 128; i < 256; i++)
1170 {
1171 c = DECODE_CHAR (cs, i);
1172 unibyte_to_multibyte_table[i] = (c < 0 ? BYTE8_TO_CHAR (i) : c);
1173 unibyte_has_multibyte_table[i] = c >= 0;
1174 }
1175
1176 return Qnil;
1177 }
1178
1179
1180 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1181 doc: /* Return the property list of CHARSET. */)
1182 (charset)
1183 Lisp_Object charset;
1184 {
1185 Lisp_Object attrs;
1186
1187 CHECK_CHARSET_GET_ATTR (charset, attrs);
1188 return CHARSET_ATTR_PLIST (attrs);
1189 }
1190
1191
1192 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1193 doc: /* Set CHARSET's property list to PLIST. */)
1194 (charset, plist)
1195 Lisp_Object charset, plist;
1196 {
1197 Lisp_Object attrs;
1198
1199 CHECK_CHARSET_GET_ATTR (charset, attrs);
1200 CHARSET_ATTR_PLIST (attrs) = plist;
1201 return plist;
1202 }
1203
1204
1205 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1206 doc: /* Unify characters of CHARSET with Unicode.
1207 This means reading the relevant file and installing the table defined
1208 by CHARSET's `:unify-map' property.
1209
1210 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1211 the same meaning as the `:unify-map' attribute in the function
1212 `define-charset' (which see).
1213
1214 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1215 (charset, unify_map, deunify)
1216 Lisp_Object charset, unify_map, deunify;
1217 {
1218 int id;
1219 struct charset *cs;
1220
1221 CHECK_CHARSET_GET_ID (charset, id);
1222 cs = CHARSET_FROM_ID (id);
1223 if (CHARSET_METHOD (cs) == CHARSET_METHOD_MAP_DEFERRED)
1224 load_charset (cs);
1225 if (NILP (deunify)
1226 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1227 : ! CHARSET_UNIFIED_P (cs))
1228 return Qnil;
1229
1230 CHARSET_UNIFIED_P (cs) = 0;
1231 if (NILP (deunify))
1232 {
1233 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET)
1234 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1235 if (NILP (unify_map))
1236 unify_map = CHARSET_UNIFY_MAP (cs);
1237 if (STRINGP (unify_map))
1238 load_charset_map_from_file (cs, unify_map, 2);
1239 else if (VECTORP (unify_map))
1240 load_charset_map_from_vector (cs, unify_map, 2);
1241 else if (NILP (unify_map))
1242 error ("No unify-map for charset");
1243 else
1244 error ("Bad unify-map arg");
1245 CHARSET_UNIFIED_P (cs) = 1;
1246 }
1247 else if (CHAR_TABLE_P (Vchar_unify_table))
1248 {
1249 int min_code = CHARSET_MIN_CODE (cs);
1250 int max_code = CHARSET_MAX_CODE (cs);
1251 int min_char = DECODE_CHAR (cs, min_code);
1252 int max_char = DECODE_CHAR (cs, max_code);
1253
1254 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1255 }
1256
1257 return Qnil;
1258 }
1259
1260 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1261 Sget_unused_iso_final_char, 2, 2, 0,
1262 doc: /*
1263 Return an unused ISO final char for a charset of DIMENISION and CHARS.
1264 DIMENSION is the number of bytes to represent a character: 1 or 2.
1265 CHARS is the number of characters in a dimension: 94 or 96.
1266
1267 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1268 If there's no unused final char for the specified kind of charset,
1269 return nil. */)
1270 (dimension, chars)
1271 Lisp_Object dimension, chars;
1272 {
1273 int final_char;
1274
1275 CHECK_NUMBER (dimension);
1276 CHECK_NUMBER (chars);
1277 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1278 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1279 if (XINT (chars) != 94 && XINT (chars) != 96)
1280 args_out_of_range_3 (chars, make_number (94), make_number (96));
1281 for (final_char = '0'; final_char <= '?'; final_char++)
1282 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1283 break;
1284 return (final_char <= '?' ? make_number (final_char) : Qnil);
1285 }
1286
1287 static void
1288 check_iso_charset_parameter (dimension, chars, final_char)
1289 Lisp_Object dimension, chars, final_char;
1290 {
1291 CHECK_NATNUM (dimension);
1292 CHECK_NATNUM (chars);
1293 CHECK_NATNUM (final_char);
1294
1295 if (XINT (dimension) > 3)
1296 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1297 if (XINT (chars) != 94 && XINT (chars) != 96)
1298 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1299 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1300 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1301 }
1302
1303
1304 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1305 4, 4, 0,
1306 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1307
1308 On decoding by an ISO-2022 base coding system, when a charset
1309 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1310 if CHARSET is designated instead. */)
1311 (dimension, chars, final_char, charset)
1312 Lisp_Object dimension, chars, final_char, charset;
1313 {
1314 int id;
1315 int chars_flag;
1316
1317 CHECK_CHARSET_GET_ID (charset, id);
1318 check_iso_charset_parameter (dimension, chars, final_char);
1319 chars_flag = XINT (chars) == 96;
1320 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1321 return Qnil;
1322 }
1323
1324
1325 /* Return information about charsets in the text at PTR of NBYTES
1326 bytes, which are NCHARS characters. The value is:
1327
1328 0: Each character is represented by one byte. This is always
1329 true for a unibyte string. For a multibyte string, true if
1330 it contains only ASCII characters.
1331
1332 1: No charsets other than ascii, control-1, and latin-1 are
1333 found.
1334
1335 2: Otherwise.
1336 */
1337
1338 int
1339 string_xstring_p (string)
1340 Lisp_Object string;
1341 {
1342 const unsigned char *p = SDATA (string);
1343 const unsigned char *endp = p + SBYTES (string);
1344
1345 if (SCHARS (string) == SBYTES (string))
1346 return 0;
1347
1348 while (p < endp)
1349 {
1350 int c = STRING_CHAR_ADVANCE (p);
1351
1352 if (c >= 0x100)
1353 return 2;
1354 }
1355 return 1;
1356 }
1357
1358
1359 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1360
1361 CHARSETS is a vector. If Nth element is non-nil, it means the
1362 charset whose id is N is already found.
1363
1364 It may lookup a translation table TABLE if supplied. */
1365
1366 static void
1367 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1368 const unsigned char *ptr;
1369 EMACS_INT nchars, nbytes;
1370 Lisp_Object charsets, table;
1371 int multibyte;
1372 {
1373 const unsigned char *pend = ptr + nbytes;
1374
1375 if (nchars == nbytes)
1376 {
1377 if (multibyte)
1378 ASET (charsets, charset_ascii, Qt);
1379 else
1380 while (ptr < pend)
1381 {
1382 int c = *ptr++;
1383
1384 if (!NILP (table))
1385 c = translate_char (table, c);
1386 if (ASCII_BYTE_P (c))
1387 ASET (charsets, charset_ascii, Qt);
1388 else
1389 ASET (charsets, charset_eight_bit, Qt);
1390 }
1391 }
1392 else
1393 {
1394 while (ptr < pend)
1395 {
1396 int c = STRING_CHAR_ADVANCE (ptr);
1397 struct charset *charset;
1398
1399 if (!NILP (table))
1400 c = translate_char (table, c);
1401 charset = CHAR_CHARSET (c);
1402 ASET (charsets, CHARSET_ID (charset), Qt);
1403 }
1404 }
1405 }
1406
1407 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1408 2, 3, 0,
1409 doc: /* Return a list of charsets in the region between BEG and END.
1410 BEG and END are buffer positions.
1411 Optional arg TABLE if non-nil is a translation table to look up.
1412
1413 If the current buffer is unibyte, the returned list may contain
1414 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1415 (beg, end, table)
1416 Lisp_Object beg, end, table;
1417 {
1418 Lisp_Object charsets;
1419 EMACS_INT from, from_byte, to, stop, stop_byte;
1420 int i;
1421 Lisp_Object val;
1422 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1423
1424 validate_region (&beg, &end);
1425 from = XFASTINT (beg);
1426 stop = to = XFASTINT (end);
1427
1428 if (from < GPT && GPT < to)
1429 {
1430 stop = GPT;
1431 stop_byte = GPT_BYTE;
1432 }
1433 else
1434 stop_byte = CHAR_TO_BYTE (stop);
1435
1436 from_byte = CHAR_TO_BYTE (from);
1437
1438 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1439 while (1)
1440 {
1441 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1442 stop_byte - from_byte, charsets, table,
1443 multibyte);
1444 if (stop < to)
1445 {
1446 from = stop, from_byte = stop_byte;
1447 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1448 }
1449 else
1450 break;
1451 }
1452
1453 val = Qnil;
1454 for (i = charset_table_used - 1; i >= 0; i--)
1455 if (!NILP (AREF (charsets, i)))
1456 val = Fcons (CHARSET_NAME (charset_table + i), val);
1457 return val;
1458 }
1459
1460 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1461 1, 2, 0,
1462 doc: /* Return a list of charsets in STR.
1463 Optional arg TABLE if non-nil is a translation table to look up.
1464
1465 If STR is unibyte, the returned list may contain
1466 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1467 (str, table)
1468 Lisp_Object str, table;
1469 {
1470 Lisp_Object charsets;
1471 int i;
1472 Lisp_Object val;
1473
1474 CHECK_STRING (str);
1475
1476 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1477 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1478 charsets, table,
1479 STRING_MULTIBYTE (str));
1480 val = Qnil;
1481 for (i = charset_table_used - 1; i >= 0; i--)
1482 if (!NILP (AREF (charsets, i)))
1483 val = Fcons (CHARSET_NAME (charset_table + i), val);
1484 return val;
1485 }
1486
1487 \f
1488
1489 /* Return a character correponding to the code-point CODE of
1490 CHARSET. */
1491
1492 int
1493 decode_char (charset, code)
1494 struct charset *charset;
1495 unsigned code;
1496 {
1497 int c, char_index;
1498 enum charset_method method = CHARSET_METHOD (charset);
1499
1500 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1501 return -1;
1502
1503 if (method == CHARSET_METHOD_MAP_DEFERRED)
1504 {
1505 load_charset (charset);
1506 method = CHARSET_METHOD (charset);
1507 }
1508
1509 if (method == CHARSET_METHOD_SUBSET)
1510 {
1511 Lisp_Object subset_info;
1512
1513 subset_info = CHARSET_SUBSET (charset);
1514 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1515 code -= XINT (AREF (subset_info, 3));
1516 if (code < XFASTINT (AREF (subset_info, 1))
1517 || code > XFASTINT (AREF (subset_info, 2)))
1518 c = -1;
1519 else
1520 c = DECODE_CHAR (charset, code);
1521 }
1522 else if (method == CHARSET_METHOD_SUPERSET)
1523 {
1524 Lisp_Object parents;
1525
1526 parents = CHARSET_SUPERSET (charset);
1527 c = -1;
1528 for (; CONSP (parents); parents = XCDR (parents))
1529 {
1530 int id = XINT (XCAR (XCAR (parents)));
1531 int code_offset = XINT (XCDR (XCAR (parents)));
1532 unsigned this_code = code - code_offset;
1533
1534 charset = CHARSET_FROM_ID (id);
1535 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1536 break;
1537 }
1538 }
1539 else
1540 {
1541 char_index = CODE_POINT_TO_INDEX (charset, code);
1542 if (char_index < 0)
1543 return -1;
1544
1545 if (method == CHARSET_METHOD_MAP)
1546 {
1547 Lisp_Object decoder;
1548
1549 decoder = CHARSET_DECODER (charset);
1550 if (! VECTORP (decoder))
1551 return -1;
1552 c = XINT (AREF (decoder, char_index));
1553 }
1554 else
1555 {
1556 c = char_index + CHARSET_CODE_OFFSET (charset);
1557 }
1558 }
1559
1560 if (CHARSET_UNIFIED_P (charset)
1561 && c >= 0)
1562 {
1563 MAYBE_UNIFY_CHAR (c);
1564 }
1565
1566 return c;
1567 }
1568
1569 /* Variable used temporarily by the macro ENCODE_CHAR. */
1570 Lisp_Object charset_work;
1571
1572 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1573 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1574 use CHARSET's strict_max_char instead of max_char. */
1575
1576 unsigned
1577 encode_char (charset, c)
1578 struct charset *charset;
1579 int c;
1580 {
1581 unsigned code;
1582 enum charset_method method = CHARSET_METHOD (charset);
1583
1584 if (CHARSET_UNIFIED_P (charset))
1585 {
1586 Lisp_Object deunifier, deunified;
1587
1588 deunifier = CHARSET_DEUNIFIER (charset);
1589 if (! CHAR_TABLE_P (deunifier))
1590 {
1591 Funify_charset (CHARSET_NAME (charset), Qnil, Qnil);
1592 deunifier = CHARSET_DEUNIFIER (charset);
1593 }
1594 deunified = CHAR_TABLE_REF (deunifier, c);
1595 if (! NILP (deunified))
1596 c = XINT (deunified);
1597 }
1598
1599 if (method == CHARSET_METHOD_SUBSET)
1600 {
1601 Lisp_Object subset_info;
1602 struct charset *this_charset;
1603
1604 subset_info = CHARSET_SUBSET (charset);
1605 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1606 code = ENCODE_CHAR (this_charset, c);
1607 if (code == CHARSET_INVALID_CODE (this_charset)
1608 || code < XFASTINT (AREF (subset_info, 1))
1609 || code > XFASTINT (AREF (subset_info, 2)))
1610 return CHARSET_INVALID_CODE (charset);
1611 code += XINT (AREF (subset_info, 3));
1612 return code;
1613 }
1614
1615 if (method == CHARSET_METHOD_SUPERSET)
1616 {
1617 Lisp_Object parents;
1618
1619 parents = CHARSET_SUPERSET (charset);
1620 for (; CONSP (parents); parents = XCDR (parents))
1621 {
1622 int id = XINT (XCAR (XCAR (parents)));
1623 int code_offset = XINT (XCDR (XCAR (parents)));
1624 struct charset *this_charset = CHARSET_FROM_ID (id);
1625
1626 code = ENCODE_CHAR (this_charset, c);
1627 if (code != CHARSET_INVALID_CODE (this_charset))
1628 return code + code_offset;
1629 }
1630 return CHARSET_INVALID_CODE (charset);
1631 }
1632
1633 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1634 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1635 return CHARSET_INVALID_CODE (charset);
1636
1637 if (method == CHARSET_METHOD_MAP_DEFERRED)
1638 {
1639 load_charset (charset);
1640 method = CHARSET_METHOD (charset);
1641 }
1642
1643 if (method == CHARSET_METHOD_MAP)
1644 {
1645 Lisp_Object encoder;
1646 Lisp_Object val;
1647
1648 encoder = CHARSET_ENCODER (charset);
1649 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1650 return CHARSET_INVALID_CODE (charset);
1651 val = CHAR_TABLE_REF (encoder, c);
1652 if (NILP (val))
1653 return CHARSET_INVALID_CODE (charset);
1654 code = XINT (val);
1655 if (! CHARSET_COMPACT_CODES_P (charset))
1656 code = INDEX_TO_CODE_POINT (charset, code);
1657 }
1658 else /* method == CHARSET_METHOD_OFFSET */
1659 {
1660 code = c - CHARSET_CODE_OFFSET (charset);
1661 code = INDEX_TO_CODE_POINT (charset, code);
1662 }
1663
1664 return code;
1665 }
1666
1667
1668 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1669 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1670 Return nil if CODE-POINT is not valid in CHARSET.
1671
1672 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1673
1674 Optional argument RESTRICTION specifies a way to map the pair of CCS
1675 and CODE-POINT to a chracter. Currently not supported and just ignored. */)
1676 (charset, code_point, restriction)
1677 Lisp_Object charset, code_point, restriction;
1678 {
1679 int c, id;
1680 unsigned code;
1681 struct charset *charsetp;
1682
1683 CHECK_CHARSET_GET_ID (charset, id);
1684 if (CONSP (code_point))
1685 {
1686 CHECK_NATNUM_CAR (code_point);
1687 CHECK_NATNUM_CDR (code_point);
1688 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1689 }
1690 else
1691 {
1692 CHECK_NATNUM (code_point);
1693 code = XINT (code_point);
1694 }
1695 charsetp = CHARSET_FROM_ID (id);
1696 c = DECODE_CHAR (charsetp, code);
1697 return (c >= 0 ? make_number (c) : Qnil);
1698 }
1699
1700
1701 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1702 doc: /* Encode the character CH into a code-point of CHARSET.
1703 Return nil if CHARSET doesn't include CH.
1704
1705 Optional argument RESTRICTION specifies a way to map CHAR to a
1706 code-point in CCS. Currently not supported and just ignored. */)
1707 (ch, charset, restriction)
1708 Lisp_Object ch, charset, restriction;
1709 {
1710 int id;
1711 unsigned code;
1712 struct charset *charsetp;
1713
1714 CHECK_CHARSET_GET_ID (charset, id);
1715 CHECK_NATNUM (ch);
1716 charsetp = CHARSET_FROM_ID (id);
1717 code = ENCODE_CHAR (charsetp, XINT (ch));
1718 if (code == CHARSET_INVALID_CODE (charsetp))
1719 return Qnil;
1720 if (code > 0x7FFFFFF)
1721 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1722 return make_number (code);
1723 }
1724
1725
1726 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1727 doc:
1728 /* Return a character of CHARSET whose position codes are CODEn.
1729
1730 CODE1 through CODE4 are optional, but if you don't supply sufficient
1731 position codes, it is assumed that the minimum code in each dimension
1732 is specified. */)
1733 (charset, code1, code2, code3, code4)
1734 Lisp_Object charset, code1, code2, code3, code4;
1735 {
1736 int id, dimension;
1737 struct charset *charsetp;
1738 unsigned code;
1739 int c;
1740
1741 CHECK_CHARSET_GET_ID (charset, id);
1742 charsetp = CHARSET_FROM_ID (id);
1743
1744 dimension = CHARSET_DIMENSION (charsetp);
1745 if (NILP (code1))
1746 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1747 ? 0 : CHARSET_MIN_CODE (charsetp));
1748 else
1749 {
1750 CHECK_NATNUM (code1);
1751 if (XFASTINT (code1) >= 0x100)
1752 args_out_of_range (make_number (0xFF), code1);
1753 code = XFASTINT (code1);
1754
1755 if (dimension > 1)
1756 {
1757 code <<= 8;
1758 if (NILP (code2))
1759 code |= charsetp->code_space[(dimension - 2) * 4];
1760 else
1761 {
1762 CHECK_NATNUM (code2);
1763 if (XFASTINT (code2) >= 0x100)
1764 args_out_of_range (make_number (0xFF), code2);
1765 code |= XFASTINT (code2);
1766 }
1767
1768 if (dimension > 2)
1769 {
1770 code <<= 8;
1771 if (NILP (code3))
1772 code |= charsetp->code_space[(dimension - 3) * 4];
1773 else
1774 {
1775 CHECK_NATNUM (code3);
1776 if (XFASTINT (code3) >= 0x100)
1777 args_out_of_range (make_number (0xFF), code3);
1778 code |= XFASTINT (code3);
1779 }
1780
1781 if (dimension > 3)
1782 {
1783 code <<= 8;
1784 if (NILP (code4))
1785 code |= charsetp->code_space[0];
1786 else
1787 {
1788 CHECK_NATNUM (code4);
1789 if (XFASTINT (code4) >= 0x100)
1790 args_out_of_range (make_number (0xFF), code4);
1791 code |= XFASTINT (code4);
1792 }
1793 }
1794 }
1795 }
1796 }
1797
1798 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1799 code &= 0x7F7F7F7F;
1800 c = DECODE_CHAR (charsetp, code);
1801 if (c < 0)
1802 error ("Invalid code(s)");
1803 return make_number (c);
1804 }
1805
1806
1807 /* Return the first charset in CHARSET_LIST that contains C.
1808 CHARSET_LIST is a list of charset IDs. If it is nil, use
1809 Vcharset_ordered_list. */
1810
1811 struct charset *
1812 char_charset (c, charset_list, code_return)
1813 int c;
1814 Lisp_Object charset_list;
1815 unsigned *code_return;
1816 {
1817 if (NILP (charset_list))
1818 charset_list = Vcharset_ordered_list;
1819
1820 while (CONSP (charset_list))
1821 {
1822 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1823 unsigned code = ENCODE_CHAR (charset, c);
1824
1825 if (code != CHARSET_INVALID_CODE (charset))
1826 {
1827 if (code_return)
1828 *code_return = code;
1829 return charset;
1830 }
1831 charset_list = XCDR (charset_list);
1832 }
1833 return NULL;
1834 }
1835
1836
1837 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1838 doc:
1839 /*Return list of charset and one to four position-codes of CHAR.
1840 The charset is decided by the current priority order of charsets.
1841 A position-code is a byte value of each dimension of the code-point of
1842 CHAR in the charset. */)
1843 (ch)
1844 Lisp_Object ch;
1845 {
1846 struct charset *charset;
1847 int c, dimension;
1848 unsigned code;
1849 Lisp_Object val;
1850
1851 CHECK_CHARACTER (ch);
1852 c = XFASTINT (ch);
1853 charset = CHAR_CHARSET (c);
1854 if (! charset)
1855 abort ();
1856 code = ENCODE_CHAR (charset, c);
1857 if (code == CHARSET_INVALID_CODE (charset))
1858 abort ();
1859 dimension = CHARSET_DIMENSION (charset);
1860 for (val = Qnil; dimension > 0; dimension--)
1861 {
1862 val = Fcons (make_number (code & 0xFF), val);
1863 code >>= 8;
1864 }
1865 return Fcons (CHARSET_NAME (charset), val);
1866 }
1867
1868
1869 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1870 doc: /* Return the charset of highest priority that contains CH. */)
1871 (ch)
1872 Lisp_Object ch;
1873 {
1874 struct charset *charset;
1875
1876 CHECK_CHARACTER (ch);
1877 charset = CHAR_CHARSET (XINT (ch));
1878 return (CHARSET_NAME (charset));
1879 }
1880
1881
1882 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1883 doc: /*
1884 Return charset of a character in the current buffer at position POS.
1885 If POS is nil, it defauls to the current point.
1886 If POS is out of range, the value is nil. */)
1887 (pos)
1888 Lisp_Object pos;
1889 {
1890 Lisp_Object ch;
1891 struct charset *charset;
1892
1893 ch = Fchar_after (pos);
1894 if (! INTEGERP (ch))
1895 return ch;
1896 charset = CHAR_CHARSET (XINT (ch));
1897 return (CHARSET_NAME (charset));
1898 }
1899
1900
1901 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1902 doc: /*
1903 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1904
1905 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1906 by their DIMENSION, CHARS, and FINAL-CHAR,
1907 where as Emacs distinguishes them by charset symbol.
1908 See the documentation of the function `charset-info' for the meanings of
1909 DIMENSION, CHARS, and FINAL-CHAR. */)
1910 (dimension, chars, final_char)
1911 Lisp_Object dimension, chars, final_char;
1912 {
1913 int id;
1914 int chars_flag;
1915
1916 check_iso_charset_parameter (dimension, chars, final_char);
1917 chars_flag = XFASTINT (chars) == 96;
1918 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
1919 XFASTINT (final_char));
1920 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
1921 }
1922
1923
1924 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
1925 0, 0, 0,
1926 doc: /*
1927 Clear encoder and decoder of charsets that are loaded from mapfiles. */)
1928 ()
1929 {
1930 int i;
1931 struct charset *charset;
1932 Lisp_Object attrs;
1933
1934 for (i = 0; i < charset_table_used; i++)
1935 {
1936 charset = CHARSET_FROM_ID (i);
1937 attrs = CHARSET_ATTRIBUTES (charset);
1938
1939 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
1940 {
1941 CHARSET_ATTR_DECODER (attrs) = Qnil;
1942 CHARSET_ATTR_ENCODER (attrs) = Qnil;
1943 CHARSET_METHOD (charset) = CHARSET_METHOD_MAP_DEFERRED;
1944 }
1945
1946 if (CHARSET_UNIFIED_P (charset))
1947 CHARSET_ATTR_DEUNIFIER (attrs) = Qnil;
1948 }
1949
1950 if (CHAR_TABLE_P (Vchar_unified_charset_table))
1951 {
1952 Foptimize_char_table (Vchar_unified_charset_table);
1953 Vchar_unify_table = Vchar_unified_charset_table;
1954 Vchar_unified_charset_table = Qnil;
1955 }
1956
1957 return Qnil;
1958 }
1959
1960 DEFUN ("charset-priority-list", Fcharset_priority_list,
1961 Scharset_priority_list, 0, 1, 0,
1962 doc: /* Return the list of charsets ordered by priority.
1963 HIGHESTP non-nil means just return the highest priority one. */)
1964 (highestp)
1965 Lisp_Object highestp;
1966 {
1967 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
1968
1969 if (!NILP (highestp))
1970 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
1971
1972 while (!NILP (list))
1973 {
1974 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
1975 list = XCDR (list);
1976 }
1977 return Fnreverse (val);
1978 }
1979
1980 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
1981 1, MANY, 0,
1982 doc: /* Assign higher priority to the charsets given as arguments.
1983 usage: (set-charset-priority &rest charsets) */)
1984 (nargs, args)
1985 int nargs;
1986 Lisp_Object *args;
1987 {
1988 Lisp_Object new_head, old_list, arglist[2];
1989 Lisp_Object list_2022, list_emacs_mule;
1990 int i, id;
1991
1992 old_list = Fcopy_sequence (Vcharset_ordered_list);
1993 new_head = Qnil;
1994 for (i = 0; i < nargs; i++)
1995 {
1996 CHECK_CHARSET_GET_ID (args[i], id);
1997 if (! NILP (Fmemq (make_number (id), old_list)))
1998 {
1999 old_list = Fdelq (make_number (id), old_list);
2000 new_head = Fcons (make_number (id), new_head);
2001 }
2002 }
2003 arglist[0] = Fnreverse (new_head);
2004 arglist[1] = old_list;
2005 Vcharset_ordered_list = Fnconc (2, arglist);
2006 charset_ordered_list_tick++;
2007
2008 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2009 CONSP (old_list); old_list = XCDR (old_list))
2010 {
2011 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2012 list_2022 = Fcons (XCAR (old_list), list_2022);
2013 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2014 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2015 }
2016 Viso_2022_charset_list = Fnreverse (list_2022);
2017 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2018
2019 return Qnil;
2020 }
2021
2022 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2023 0, 1, 0,
2024 doc: /* Internal use only.
2025 Return charset identification number of CHARSET. */)
2026 (charset)
2027 Lisp_Object charset;
2028 {
2029 int id;
2030
2031 CHECK_CHARSET_GET_ID (charset, id);
2032 return make_number (id);
2033 }
2034
2035 \f
2036 void
2037 init_charset ()
2038 {
2039 Vcharset_map_path
2040 = Fcons (Fexpand_file_name (build_string ("charsets"), Vdata_directory),
2041 Qnil);
2042 }
2043
2044
2045 void
2046 init_charset_once ()
2047 {
2048 int i, j, k;
2049
2050 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2051 for (j = 0; j < ISO_MAX_CHARS; j++)
2052 for (k = 0; k < ISO_MAX_FINAL; k++)
2053 iso_charset_table[i][j][k] = -1;
2054
2055 for (i = 0; i < 256; i++)
2056 emacs_mule_charset[i] = NULL;
2057
2058 charset_jisx0201_roman = -1;
2059 charset_jisx0208_1978 = -1;
2060 charset_jisx0208 = -1;
2061
2062 for (i = 0; i < 128; i++)
2063 unibyte_to_multibyte_table[i] = i;
2064 for (; i < 256; i++)
2065 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
2066 }
2067
2068 #ifdef emacs
2069
2070 void
2071 syms_of_charset ()
2072 {
2073 DEFSYM (Qcharsetp, "charsetp");
2074
2075 DEFSYM (Qascii, "ascii");
2076 DEFSYM (Qunicode, "unicode");
2077 DEFSYM (Qeight_bit, "eight-bit");
2078 DEFSYM (Qiso_8859_1, "iso-8859-1");
2079
2080 DEFSYM (Qgl, "gl");
2081 DEFSYM (Qgr, "gr");
2082
2083 staticpro (&Vcharset_ordered_list);
2084 Vcharset_ordered_list = Qnil;
2085
2086 staticpro (&Viso_2022_charset_list);
2087 Viso_2022_charset_list = Qnil;
2088
2089 staticpro (&Vemacs_mule_charset_list);
2090 Vemacs_mule_charset_list = Qnil;
2091
2092 staticpro (&Vcharset_hash_table);
2093 {
2094 Lisp_Object args[2];
2095 args[0] = QCtest;
2096 args[1] = Qeq;
2097 Vcharset_hash_table = Fmake_hash_table (2, args);
2098 }
2099
2100 charset_table_size = 128;
2101 charset_table = ((struct charset *)
2102 xmalloc (sizeof (struct charset) * charset_table_size));
2103 charset_table_used = 0;
2104
2105 staticpro (&Vchar_unified_charset_table);
2106 Vchar_unified_charset_table = Fmake_char_table (Qnil, make_number (-1));
2107
2108 defsubr (&Scharsetp);
2109 defsubr (&Smap_charset_chars);
2110 defsubr (&Sdefine_charset_internal);
2111 defsubr (&Sdefine_charset_alias);
2112 defsubr (&Sunibyte_charset);
2113 defsubr (&Sset_unibyte_charset);
2114 defsubr (&Scharset_plist);
2115 defsubr (&Sset_charset_plist);
2116 defsubr (&Sunify_charset);
2117 defsubr (&Sget_unused_iso_final_char);
2118 defsubr (&Sdeclare_equiv_charset);
2119 defsubr (&Sfind_charset_region);
2120 defsubr (&Sfind_charset_string);
2121 defsubr (&Sdecode_char);
2122 defsubr (&Sencode_char);
2123 defsubr (&Ssplit_char);
2124 defsubr (&Smake_char);
2125 defsubr (&Schar_charset);
2126 defsubr (&Scharset_after);
2127 defsubr (&Siso_charset);
2128 defsubr (&Sclear_charset_maps);
2129 defsubr (&Scharset_priority_list);
2130 defsubr (&Sset_charset_priority);
2131 defsubr (&Scharset_id_internal);
2132
2133 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2134 doc: /* *Lisp of directories to search for charset map files. */);
2135 Vcharset_map_path = Qnil;
2136
2137 DEFVAR_LISP ("charset-list", &Vcharset_list,
2138 doc: /* List of all charsets ever defined. */);
2139 Vcharset_list = Qnil;
2140
2141 charset_ascii
2142 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2143 0, 127, 'B', -1, 0, 1, 0, 0);
2144 charset_iso_8859_1
2145 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2146 0, 255, -1, -1, -1, 1, 0, 0);
2147 charset_unicode
2148 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2149 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2150 charset_eight_bit
2151 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2152 128, 255, -1, 0, -1, 0, 0,
2153 MAX_5_BYTE_CHAR + 1);
2154 }
2155
2156 #endif /* emacs */
2157
2158 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2159 (do not change this comment) */