]> code.delx.au - gnu-emacs/blob - src/charset.c
* charset.h (charset_iso_8859_1): Remove decl.
[gnu-emacs] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001-2011 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
4 2005, 2006, 2007, 2008, 2009, 2010, 2011
5 National Institute of Advanced Industrial Science and Technology (AIST)
6 Registration Number H14PRO021
7
8 Copyright (C) 2003, 2004
9 National Institute of Advanced Industrial Science and Technology (AIST)
10 Registration Number H13PRO009
11
12 This file is part of GNU Emacs.
13
14 GNU Emacs is free software: you can redistribute it and/or modify
15 it under the terms of the GNU General Public License as published by
16 the Free Software Foundation, either version 3 of the License, or
17 (at your option) any later version.
18
19 GNU Emacs is distributed in the hope that it will be useful,
20 but WITHOUT ANY WARRANTY; without even the implied warranty of
21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 GNU General Public License for more details.
23
24 You should have received a copy of the GNU General Public License
25 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
26
27 #include <config.h>
28
29 #include <stdio.h>
30 #include <unistd.h>
31 #include <ctype.h>
32 #include <limits.h>
33 #include <sys/types.h>
34 #include <setjmp.h>
35 #include "lisp.h"
36 #include "character.h"
37 #include "charset.h"
38 #include "coding.h"
39 #include "disptab.h"
40 #include "buffer.h"
41
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
49
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
54
55 */
56
57 /* Hash table that contains attributes of each charset. Keys are
58 charset symbols, and values are vectors of charset attributes. */
59 Lisp_Object Vcharset_hash_table;
60
61 /* Table of struct charset. */
62 struct charset *charset_table;
63
64 static int charset_table_size;
65 static int charset_table_used;
66
67 Lisp_Object Qcharsetp;
68
69 /* Special charset symbols. */
70 Lisp_Object Qascii;
71 static Lisp_Object Qeight_bit;
72 static Lisp_Object Qiso_8859_1;
73 static Lisp_Object Qunicode;
74 static Lisp_Object Qemacs;
75
76 /* The corresponding charsets. */
77 int charset_ascii;
78 int charset_eight_bit;
79 static int charset_iso_8859_1;
80 int charset_unicode;
81 static int charset_emacs;
82
83 /* The other special charsets. */
84 int charset_jisx0201_roman;
85 int charset_jisx0208_1978;
86 int charset_jisx0208;
87 int charset_ksc5601;
88
89 /* Value of charset attribute `charset-iso-plane'. */
90 static Lisp_Object Qgl, Qgr;
91
92 /* Charset of unibyte characters. */
93 int charset_unibyte;
94
95 /* List of charsets ordered by the priority. */
96 Lisp_Object Vcharset_ordered_list;
97
98 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
99 charsets. */
100 Lisp_Object Vcharset_non_preferred_head;
101
102 /* Incremented everytime we change Vcharset_ordered_list. This is
103 unsigned short so that it fits in Lisp_Int and never matches
104 -1. */
105 unsigned short charset_ordered_list_tick;
106
107 /* List of iso-2022 charsets. */
108 Lisp_Object Viso_2022_charset_list;
109
110 /* List of emacs-mule charsets. */
111 Lisp_Object Vemacs_mule_charset_list;
112
113 int emacs_mule_charset[256];
114
115 /* Mapping table from ISO2022's charset (specified by DIMENSION,
116 CHARS, and FINAL-CHAR) to Emacs' charset. */
117 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
118
119 #define CODE_POINT_TO_INDEX(charset, code) \
120 ((charset)->code_linear_p \
121 ? (code) - (charset)->min_code \
122 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
123 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
124 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
125 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
126 ? (((((code) >> 24) - (charset)->code_space[12]) \
127 * (charset)->code_space[11]) \
128 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
129 * (charset)->code_space[7]) \
130 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
131 * (charset)->code_space[3]) \
132 + (((code) & 0xFF) - (charset)->code_space[0]) \
133 - ((charset)->char_index_offset)) \
134 : -1)
135
136
137 /* Convert the character index IDX to code-point CODE for CHARSET.
138 It is assumed that IDX is in a valid range. */
139
140 #define INDEX_TO_CODE_POINT(charset, idx) \
141 ((charset)->code_linear_p \
142 ? (idx) + (charset)->min_code \
143 : (idx += (charset)->char_index_offset, \
144 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
145 | (((charset)->code_space[4] \
146 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
147 << 8) \
148 | (((charset)->code_space[8] \
149 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
150 << 16) \
151 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
152 << 24))))
153
154 /* Structure to hold mapping tables for a charset. Used by temacs
155 invoked for dumping. */
156
157 static struct
158 {
159 /* The current charset for which the following tables are setup. */
160 struct charset *current;
161
162 /* 1 iff the following table is used for encoder. */
163 short for_encoder;
164
165 /* When the following table is used for encoding, mininum and
166 maxinum character of the current charset. */
167 int min_char, max_char;
168
169 /* A Unicode character correspoinding to the code indice 0 (i.e. the
170 minimum code-point) of the current charset, or -1 if the code
171 indice 0 is not a Unicode character. This is checked when
172 table.encoder[CHAR] is zero. */
173 int zero_index_char;
174
175 union {
176 /* Table mapping code-indices (not code-points) of the current
177 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
178 doesn't belong to the current charset. */
179 int decoder[0x10000];
180 /* Table mapping Unicode characters to code-indices of the current
181 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
182 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
183 (0x20000..0x2FFFF). Note that there is no charset map that
184 uses both SMP and SIP. */
185 unsigned short encoder[0x20000];
186 } table;
187 } *temp_charset_work;
188
189 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
190 do { \
191 if ((CODE) == 0) \
192 temp_charset_work->zero_index_char = (C); \
193 else if ((C) < 0x20000) \
194 temp_charset_work->table.encoder[(C)] = (CODE); \
195 else \
196 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
197 } while (0)
198
199 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
200 ((C) == temp_charset_work->zero_index_char ? 0 \
201 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
202 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
203 : temp_charset_work->table.encoder[(C) - 0x10000] \
204 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
205
206 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
207 (temp_charset_work->table.decoder[(CODE)] = (C))
208
209 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
210 (temp_charset_work->table.decoder[(CODE)])
211 \f
212
213 /* Set to 1 to warn that a charset map is loaded and thus a buffer
214 text and a string data may be relocated. */
215 int charset_map_loaded;
216
217 struct charset_map_entries
218 {
219 struct {
220 unsigned from, to;
221 int c;
222 } entry[0x10000];
223 struct charset_map_entries *next;
224 };
225
226 /* Load the mapping information of CHARSET from ENTRIES for
227 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
228 encoding (CONTROL_FLAG == 2).
229
230 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
231 and CHARSET->fast_map.
232
233 If CONTROL_FLAG is 1, setup the following tables according to
234 CHARSET->method and inhibit_load_charset_map.
235
236 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
237 ----------------------+--------------------+---------------------------
238 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
239 ----------------------+--------------------+---------------------------
240 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
241
242 If CONTROL_FLAG is 2, setup the following tables.
243
244 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
245 ----------------------+--------------------+---------------------------
246 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
247 ----------------------+--------------------+--------------------------
248 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
249 */
250
251 static void
252 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
253 {
254 Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
255 unsigned max_code = CHARSET_MAX_CODE (charset);
256 int ascii_compatible_p = charset->ascii_compatible_p;
257 int min_char, max_char, nonascii_min_char;
258 int i;
259 unsigned char *fast_map = charset->fast_map;
260
261 if (n_entries <= 0)
262 return;
263
264 if (control_flag)
265 {
266 if (! inhibit_load_charset_map)
267 {
268 if (control_flag == 1)
269 {
270 if (charset->method == CHARSET_METHOD_MAP)
271 {
272 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
273
274 vec = CHARSET_DECODER (charset)
275 = Fmake_vector (make_number (n), make_number (-1));
276 }
277 else
278 {
279 char_table_set_range (Vchar_unify_table,
280 charset->min_char, charset->max_char,
281 Qnil);
282 }
283 }
284 else
285 {
286 table = Fmake_char_table (Qnil, Qnil);
287 if (charset->method == CHARSET_METHOD_MAP)
288 CHARSET_ENCODER (charset) = table;
289 else
290 CHARSET_DEUNIFIER (charset) = table;
291 }
292 }
293 else
294 {
295 if (! temp_charset_work)
296 temp_charset_work = xmalloc (sizeof (*temp_charset_work));
297 if (control_flag == 1)
298 {
299 memset (temp_charset_work->table.decoder, -1,
300 sizeof (int) * 0x10000);
301 }
302 else
303 {
304 memset (temp_charset_work->table.encoder, 0,
305 sizeof (unsigned short) * 0x20000);
306 temp_charset_work->zero_index_char = -1;
307 }
308 temp_charset_work->current = charset;
309 temp_charset_work->for_encoder = (control_flag == 2);
310 control_flag += 2;
311 }
312 charset_map_loaded = 1;
313 }
314
315 min_char = max_char = entries->entry[0].c;
316 nonascii_min_char = MAX_CHAR;
317 for (i = 0; i < n_entries; i++)
318 {
319 unsigned from, to;
320 int from_index, to_index, lim_index;
321 int from_c, to_c;
322 int idx = i % 0x10000;
323
324 if (i > 0 && idx == 0)
325 entries = entries->next;
326 from = entries->entry[idx].from;
327 to = entries->entry[idx].to;
328 from_c = entries->entry[idx].c;
329 from_index = CODE_POINT_TO_INDEX (charset, from);
330 if (from == to)
331 {
332 to_index = from_index;
333 to_c = from_c;
334 }
335 else
336 {
337 to_index = CODE_POINT_TO_INDEX (charset, to);
338 to_c = from_c + (to_index - from_index);
339 }
340 if (from_index < 0 || to_index < 0)
341 continue;
342 lim_index = to_index + 1;
343
344 if (to_c > max_char)
345 max_char = to_c;
346 else if (from_c < min_char)
347 min_char = from_c;
348
349 if (control_flag == 1)
350 {
351 if (charset->method == CHARSET_METHOD_MAP)
352 for (; from_index < lim_index; from_index++, from_c++)
353 ASET (vec, from_index, make_number (from_c));
354 else
355 for (; from_index < lim_index; from_index++, from_c++)
356 CHAR_TABLE_SET (Vchar_unify_table,
357 CHARSET_CODE_OFFSET (charset) + from_index,
358 make_number (from_c));
359 }
360 else if (control_flag == 2)
361 {
362 if (charset->method == CHARSET_METHOD_MAP
363 && CHARSET_COMPACT_CODES_P (charset))
364 for (; from_index < lim_index; from_index++, from_c++)
365 {
366 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
367
368 if (NILP (CHAR_TABLE_REF (table, from_c)))
369 CHAR_TABLE_SET (table, from_c, make_number (code));
370 }
371 else
372 for (; from_index < lim_index; from_index++, from_c++)
373 {
374 if (NILP (CHAR_TABLE_REF (table, from_c)))
375 CHAR_TABLE_SET (table, from_c, make_number (from_index));
376 }
377 }
378 else if (control_flag == 3)
379 for (; from_index < lim_index; from_index++, from_c++)
380 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
381 else if (control_flag == 4)
382 for (; from_index < lim_index; from_index++, from_c++)
383 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
384 else /* control_flag == 0 */
385 {
386 if (ascii_compatible_p)
387 {
388 if (! ASCII_BYTE_P (from_c))
389 {
390 if (from_c < nonascii_min_char)
391 nonascii_min_char = from_c;
392 }
393 else if (! ASCII_BYTE_P (to_c))
394 {
395 nonascii_min_char = 0x80;
396 }
397 }
398
399 for (; from_c <= to_c; from_c++)
400 CHARSET_FAST_MAP_SET (from_c, fast_map);
401 }
402 }
403
404 if (control_flag == 0)
405 {
406 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
407 ? nonascii_min_char : min_char);
408 CHARSET_MAX_CHAR (charset) = max_char;
409 }
410 else if (control_flag == 4)
411 {
412 temp_charset_work->min_char = min_char;
413 temp_charset_work->max_char = max_char;
414 }
415 }
416
417
418 /* Read a hexadecimal number (preceded by "0x") from the file FP while
419 paying attention to comment character '#'. */
420
421 static INLINE unsigned
422 read_hex (FILE *fp, int *eof)
423 {
424 int c;
425 unsigned n;
426
427 while ((c = getc (fp)) != EOF)
428 {
429 if (c == '#')
430 {
431 while ((c = getc (fp)) != EOF && c != '\n');
432 }
433 else if (c == '0')
434 {
435 if ((c = getc (fp)) == EOF || c == 'x')
436 break;
437 }
438 }
439 if (c == EOF)
440 {
441 *eof = 1;
442 return 0;
443 }
444 *eof = 0;
445 n = 0;
446 if (c == 'x')
447 while ((c = getc (fp)) != EOF && isxdigit (c))
448 n = ((n << 4)
449 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
450 else
451 while ((c = getc (fp)) != EOF && isdigit (c))
452 n = (n * 10) + c - '0';
453 if (c != EOF)
454 ungetc (c, fp);
455 return n;
456 }
457
458 /* Return a mapping vector for CHARSET loaded from MAPFILE.
459 Each line of MAPFILE has this form
460 0xAAAA 0xCCCC
461 where 0xAAAA is a code-point and 0xCCCC is the corresponding
462 character code, or this form
463 0xAAAA-0xBBBB 0xCCCC
464 where 0xAAAA and 0xBBBB are code-points specifying a range, and
465 0xCCCC is the first character code of the range.
466
467 The returned vector has this form:
468 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
469 where CODE1 is a code-point or a cons of code-points specifying a
470 range.
471
472 Note that this function uses `openp' to open MAPFILE but ignores
473 `file-name-handler-alist' to avoid running any Lisp code. */
474
475 static void
476 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
477 {
478 unsigned min_code = CHARSET_MIN_CODE (charset);
479 unsigned max_code = CHARSET_MAX_CODE (charset);
480 int fd;
481 FILE *fp;
482 int eof;
483 Lisp_Object suffixes;
484 struct charset_map_entries *head, *entries;
485 int n_entries, count;
486 USE_SAFE_ALLOCA;
487
488 suffixes = Fcons (build_string (".map"),
489 Fcons (build_string (".TXT"), Qnil));
490
491 count = SPECPDL_INDEX ();
492 specbind (Qfile_name_handler_alist, Qnil);
493 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
494 unbind_to (count, Qnil);
495 if (fd < 0
496 || ! (fp = fdopen (fd, "r")))
497 error ("Failure in loading charset map: %s", SDATA (mapfile));
498
499 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
500 large (larger than MAX_ALLOCA). */
501 SAFE_ALLOCA (head, struct charset_map_entries *,
502 sizeof (struct charset_map_entries));
503 entries = head;
504 memset (entries, 0, sizeof (struct charset_map_entries));
505
506 n_entries = 0;
507 eof = 0;
508 while (1)
509 {
510 unsigned from, to;
511 int c;
512 int idx;
513
514 from = read_hex (fp, &eof);
515 if (eof)
516 break;
517 if (getc (fp) == '-')
518 to = read_hex (fp, &eof);
519 else
520 to = from;
521 c = (int) read_hex (fp, &eof);
522
523 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
524 continue;
525
526 if (n_entries > 0 && (n_entries % 0x10000) == 0)
527 {
528 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
529 sizeof (struct charset_map_entries));
530 entries = entries->next;
531 memset (entries, 0, sizeof (struct charset_map_entries));
532 }
533 idx = n_entries % 0x10000;
534 entries->entry[idx].from = from;
535 entries->entry[idx].to = to;
536 entries->entry[idx].c = c;
537 n_entries++;
538 }
539 fclose (fp);
540
541 load_charset_map (charset, head, n_entries, control_flag);
542 SAFE_FREE ();
543 }
544
545 static void
546 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
547 {
548 unsigned min_code = CHARSET_MIN_CODE (charset);
549 unsigned max_code = CHARSET_MAX_CODE (charset);
550 struct charset_map_entries *head, *entries;
551 int n_entries;
552 int len = ASIZE (vec);
553 int i;
554 USE_SAFE_ALLOCA;
555
556 if (len % 2 == 1)
557 {
558 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
559 return;
560 }
561
562 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
563 large (larger than MAX_ALLOCA). */
564 SAFE_ALLOCA (head, struct charset_map_entries *,
565 sizeof (struct charset_map_entries));
566 entries = head;
567 memset (entries, 0, sizeof (struct charset_map_entries));
568
569 n_entries = 0;
570 for (i = 0; i < len; i += 2)
571 {
572 Lisp_Object val, val2;
573 unsigned from, to;
574 int c;
575 int idx;
576
577 val = AREF (vec, i);
578 if (CONSP (val))
579 {
580 val2 = XCDR (val);
581 val = XCAR (val);
582 CHECK_NATNUM (val);
583 CHECK_NATNUM (val2);
584 from = XFASTINT (val);
585 to = XFASTINT (val2);
586 }
587 else
588 {
589 CHECK_NATNUM (val);
590 from = to = XFASTINT (val);
591 }
592 val = AREF (vec, i + 1);
593 CHECK_NATNUM (val);
594 c = XFASTINT (val);
595
596 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
597 continue;
598
599 if (n_entries > 0 && (n_entries % 0x10000) == 0)
600 {
601 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
602 sizeof (struct charset_map_entries));
603 entries = entries->next;
604 memset (entries, 0, sizeof (struct charset_map_entries));
605 }
606 idx = n_entries % 0x10000;
607 entries->entry[idx].from = from;
608 entries->entry[idx].to = to;
609 entries->entry[idx].c = c;
610 n_entries++;
611 }
612
613 load_charset_map (charset, head, n_entries, control_flag);
614 SAFE_FREE ();
615 }
616
617
618 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
619 map it is (see the comment of load_charset_map for the detail). */
620
621 static void
622 load_charset (struct charset *charset, int control_flag)
623 {
624 Lisp_Object map;
625
626 if (inhibit_load_charset_map
627 && temp_charset_work
628 && charset == temp_charset_work->current
629 && ((control_flag == 2) == temp_charset_work->for_encoder))
630 return;
631
632 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
633 map = CHARSET_MAP (charset);
634 else
635 {
636 if (! CHARSET_UNIFIED_P (charset))
637 abort ();
638 map = CHARSET_UNIFY_MAP (charset);
639 }
640 if (STRINGP (map))
641 load_charset_map_from_file (charset, map, control_flag);
642 else
643 load_charset_map_from_vector (charset, map, control_flag);
644 }
645
646
647 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
648 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
649 (Lisp_Object object)
650 {
651 return (CHARSETP (object) ? Qt : Qnil);
652 }
653
654
655 static void
656 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
657 Lisp_Object function, Lisp_Object arg,
658 unsigned int from, unsigned int to)
659 {
660 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
661 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
662 Lisp_Object range;
663 int c, stop;
664 struct gcpro gcpro1;
665
666 range = Fcons (Qnil, Qnil);
667 GCPRO1 (range);
668
669 c = temp_charset_work->min_char;
670 stop = (temp_charset_work->max_char < 0x20000
671 ? temp_charset_work->max_char : 0xFFFF);
672
673 while (1)
674 {
675 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
676
677 if (idx >= from_idx && idx <= to_idx)
678 {
679 if (NILP (XCAR (range)))
680 XSETCAR (range, make_number (c));
681 }
682 else if (! NILP (XCAR (range)))
683 {
684 XSETCDR (range, make_number (c - 1));
685 if (c_function)
686 (*c_function) (arg, range);
687 else
688 call2 (function, range, arg);
689 XSETCAR (range, Qnil);
690 }
691 if (c == stop)
692 {
693 if (c == temp_charset_work->max_char)
694 {
695 if (! NILP (XCAR (range)))
696 {
697 XSETCDR (range, make_number (c));
698 if (c_function)
699 (*c_function) (arg, range);
700 else
701 call2 (function, range, arg);
702 }
703 break;
704 }
705 c = 0x1FFFF;
706 stop = temp_charset_work->max_char;
707 }
708 c++;
709 }
710 UNGCPRO;
711 }
712
713 void
714 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
715 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
716 {
717 Lisp_Object range;
718 int partial;
719
720 partial = (from > CHARSET_MIN_CODE (charset)
721 || to < CHARSET_MAX_CODE (charset));
722
723 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
724 {
725 int from_idx = CODE_POINT_TO_INDEX (charset, from);
726 int to_idx = CODE_POINT_TO_INDEX (charset, to);
727 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
728 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
729
730 if (CHARSET_UNIFIED_P (charset))
731 {
732 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
733 load_charset (charset, 2);
734 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
735 map_char_table_for_charset (c_function, function,
736 CHARSET_DEUNIFIER (charset), arg,
737 partial ? charset : NULL, from, to);
738 else
739 map_charset_for_dump (c_function, function, arg, from, to);
740 }
741
742 range = Fcons (make_number (from_c), make_number (to_c));
743 if (NILP (function))
744 (*c_function) (arg, range);
745 else
746 call2 (function, range, arg);
747 }
748 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
749 {
750 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
751 load_charset (charset, 2);
752 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
753 map_char_table_for_charset (c_function, function,
754 CHARSET_ENCODER (charset), arg,
755 partial ? charset : NULL, from, to);
756 else
757 map_charset_for_dump (c_function, function, arg, from, to);
758 }
759 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
760 {
761 Lisp_Object subset_info;
762 int offset;
763
764 subset_info = CHARSET_SUBSET (charset);
765 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
766 offset = XINT (AREF (subset_info, 3));
767 from -= offset;
768 if (from < XFASTINT (AREF (subset_info, 1)))
769 from = XFASTINT (AREF (subset_info, 1));
770 to -= offset;
771 if (to > XFASTINT (AREF (subset_info, 2)))
772 to = XFASTINT (AREF (subset_info, 2));
773 map_charset_chars (c_function, function, arg, charset, from, to);
774 }
775 else /* i.e. CHARSET_METHOD_SUPERSET */
776 {
777 Lisp_Object parents;
778
779 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
780 parents = XCDR (parents))
781 {
782 int offset;
783 unsigned this_from, this_to;
784
785 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
786 offset = XINT (XCDR (XCAR (parents)));
787 this_from = from > offset ? from - offset : 0;
788 this_to = to > offset ? to - offset : 0;
789 if (this_from < CHARSET_MIN_CODE (charset))
790 this_from = CHARSET_MIN_CODE (charset);
791 if (this_to > CHARSET_MAX_CODE (charset))
792 this_to = CHARSET_MAX_CODE (charset);
793 map_charset_chars (c_function, function, arg, charset,
794 this_from, this_to);
795 }
796 }
797 }
798
799 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
800 doc: /* Call FUNCTION for all characters in CHARSET.
801 FUNCTION is called with an argument RANGE and the optional 3rd
802 argument ARG.
803
804 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
805 characters contained in CHARSET.
806
807 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
808 range of code points (in CHARSET) of target characters. */)
809 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
810 {
811 struct charset *cs;
812 unsigned from, to;
813
814 CHECK_CHARSET_GET_CHARSET (charset, cs);
815 if (NILP (from_code))
816 from = CHARSET_MIN_CODE (cs);
817 else
818 {
819 CHECK_NATNUM (from_code);
820 from = XINT (from_code);
821 if (from < CHARSET_MIN_CODE (cs))
822 from = CHARSET_MIN_CODE (cs);
823 }
824 if (NILP (to_code))
825 to = CHARSET_MAX_CODE (cs);
826 else
827 {
828 CHECK_NATNUM (to_code);
829 to = XINT (to_code);
830 if (to > CHARSET_MAX_CODE (cs))
831 to = CHARSET_MAX_CODE (cs);
832 }
833 map_charset_chars (NULL, function, arg, cs, from, to);
834 return Qnil;
835 }
836
837
838 /* Define a charset according to the arguments. The Nth argument is
839 the Nth attribute of the charset (the last attribute `charset-id'
840 is not included). See the docstring of `define-charset' for the
841 detail. */
842
843 DEFUN ("define-charset-internal", Fdefine_charset_internal,
844 Sdefine_charset_internal, charset_arg_max, MANY, 0,
845 doc: /* For internal use only.
846 usage: (define-charset-internal ...) */)
847 (size_t nargs, Lisp_Object *args)
848 {
849 /* Charset attr vector. */
850 Lisp_Object attrs;
851 Lisp_Object val;
852 unsigned hash_code;
853 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
854 int i, j;
855 struct charset charset;
856 int id;
857 int dimension;
858 int new_definition_p;
859 int nchars;
860
861 if (nargs != charset_arg_max)
862 return Fsignal (Qwrong_number_of_arguments,
863 Fcons (intern ("define-charset-internal"),
864 make_number (nargs)));
865
866 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
867
868 CHECK_SYMBOL (args[charset_arg_name]);
869 ASET (attrs, charset_name, args[charset_arg_name]);
870
871 val = args[charset_arg_code_space];
872 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
873 {
874 int min_byte, max_byte;
875
876 min_byte = XINT (Faref (val, make_number (i * 2)));
877 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
878 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
879 error ("Invalid :code-space value");
880 charset.code_space[i * 4] = min_byte;
881 charset.code_space[i * 4 + 1] = max_byte;
882 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
883 nchars *= charset.code_space[i * 4 + 2];
884 charset.code_space[i * 4 + 3] = nchars;
885 if (max_byte > 0)
886 dimension = i + 1;
887 }
888
889 val = args[charset_arg_dimension];
890 if (NILP (val))
891 charset.dimension = dimension;
892 else
893 {
894 CHECK_NATNUM (val);
895 charset.dimension = XINT (val);
896 if (charset.dimension < 1 || charset.dimension > 4)
897 args_out_of_range_3 (val, make_number (1), make_number (4));
898 }
899
900 charset.code_linear_p
901 = (charset.dimension == 1
902 || (charset.code_space[2] == 256
903 && (charset.dimension == 2
904 || (charset.code_space[6] == 256
905 && (charset.dimension == 3
906 || charset.code_space[10] == 256)))));
907
908 if (! charset.code_linear_p)
909 {
910 charset.code_space_mask = (unsigned char *) xmalloc (256);
911 memset (charset.code_space_mask, 0, 256);
912 for (i = 0; i < 4; i++)
913 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
914 j++)
915 charset.code_space_mask[j] |= (1 << i);
916 }
917
918 charset.iso_chars_96 = charset.code_space[2] == 96;
919
920 charset.min_code = (charset.code_space[0]
921 | (charset.code_space[4] << 8)
922 | (charset.code_space[8] << 16)
923 | (charset.code_space[12] << 24));
924 charset.max_code = (charset.code_space[1]
925 | (charset.code_space[5] << 8)
926 | (charset.code_space[9] << 16)
927 | (charset.code_space[13] << 24));
928 charset.char_index_offset = 0;
929
930 val = args[charset_arg_min_code];
931 if (! NILP (val))
932 {
933 unsigned code;
934
935 if (INTEGERP (val))
936 code = XINT (val);
937 else
938 {
939 CHECK_CONS (val);
940 CHECK_NUMBER_CAR (val);
941 CHECK_NUMBER_CDR (val);
942 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
943 }
944 if (code < charset.min_code
945 || code > charset.max_code)
946 args_out_of_range_3 (make_number (charset.min_code),
947 make_number (charset.max_code), val);
948 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
949 charset.min_code = code;
950 }
951
952 val = args[charset_arg_max_code];
953 if (! NILP (val))
954 {
955 unsigned code;
956
957 if (INTEGERP (val))
958 code = XINT (val);
959 else
960 {
961 CHECK_CONS (val);
962 CHECK_NUMBER_CAR (val);
963 CHECK_NUMBER_CDR (val);
964 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
965 }
966 if (code < charset.min_code
967 || code > charset.max_code)
968 args_out_of_range_3 (make_number (charset.min_code),
969 make_number (charset.max_code), val);
970 charset.max_code = code;
971 }
972
973 charset.compact_codes_p = charset.max_code < 0x10000;
974
975 val = args[charset_arg_invalid_code];
976 if (NILP (val))
977 {
978 if (charset.min_code > 0)
979 charset.invalid_code = 0;
980 else
981 {
982 XSETINT (val, charset.max_code + 1);
983 if (XINT (val) == charset.max_code + 1)
984 charset.invalid_code = charset.max_code + 1;
985 else
986 error ("Attribute :invalid-code must be specified");
987 }
988 }
989 else
990 {
991 CHECK_NATNUM (val);
992 charset.invalid_code = XFASTINT (val);
993 }
994
995 val = args[charset_arg_iso_final];
996 if (NILP (val))
997 charset.iso_final = -1;
998 else
999 {
1000 CHECK_NUMBER (val);
1001 if (XINT (val) < '0' || XINT (val) > 127)
1002 error ("Invalid iso-final-char: %"pEd, XINT (val));
1003 charset.iso_final = XINT (val);
1004 }
1005
1006 val = args[charset_arg_iso_revision];
1007 if (NILP (val))
1008 charset.iso_revision = -1;
1009 else
1010 {
1011 CHECK_NUMBER (val);
1012 if (XINT (val) > 63)
1013 args_out_of_range (make_number (63), val);
1014 charset.iso_revision = XINT (val);
1015 }
1016
1017 val = args[charset_arg_emacs_mule_id];
1018 if (NILP (val))
1019 charset.emacs_mule_id = -1;
1020 else
1021 {
1022 CHECK_NATNUM (val);
1023 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1024 error ("Invalid emacs-mule-id: %"pEd, XINT (val));
1025 charset.emacs_mule_id = XINT (val);
1026 }
1027
1028 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1029
1030 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1031
1032 charset.unified_p = 0;
1033
1034 memset (charset.fast_map, 0, sizeof (charset.fast_map));
1035
1036 if (! NILP (args[charset_arg_code_offset]))
1037 {
1038 val = args[charset_arg_code_offset];
1039 CHECK_NUMBER (val);
1040
1041 charset.method = CHARSET_METHOD_OFFSET;
1042 charset.code_offset = XINT (val);
1043
1044 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1045 charset.min_char = i + charset.code_offset;
1046 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1047 charset.max_char = i + charset.code_offset;
1048 if (charset.max_char > MAX_CHAR)
1049 error ("Unsupported max char: %d", charset.max_char);
1050
1051 i = (charset.min_char >> 7) << 7;
1052 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1053 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1054 i = (i >> 12) << 12;
1055 for (; i <= charset.max_char; i += 0x1000)
1056 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1057 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1058 charset.ascii_compatible_p = 1;
1059 }
1060 else if (! NILP (args[charset_arg_map]))
1061 {
1062 val = args[charset_arg_map];
1063 ASET (attrs, charset_map, val);
1064 charset.method = CHARSET_METHOD_MAP;
1065 }
1066 else if (! NILP (args[charset_arg_subset]))
1067 {
1068 Lisp_Object parent;
1069 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1070 struct charset *parent_charset;
1071
1072 val = args[charset_arg_subset];
1073 parent = Fcar (val);
1074 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1075 parent_min_code = Fnth (make_number (1), val);
1076 CHECK_NATNUM (parent_min_code);
1077 parent_max_code = Fnth (make_number (2), val);
1078 CHECK_NATNUM (parent_max_code);
1079 parent_code_offset = Fnth (make_number (3), val);
1080 CHECK_NUMBER (parent_code_offset);
1081 val = Fmake_vector (make_number (4), Qnil);
1082 ASET (val, 0, make_number (parent_charset->id));
1083 ASET (val, 1, parent_min_code);
1084 ASET (val, 2, parent_max_code);
1085 ASET (val, 3, parent_code_offset);
1086 ASET (attrs, charset_subset, val);
1087
1088 charset.method = CHARSET_METHOD_SUBSET;
1089 /* Here, we just copy the parent's fast_map. It's not accurate,
1090 but at least it works for quickly detecting which character
1091 DOESN'T belong to this charset. */
1092 for (i = 0; i < 190; i++)
1093 charset.fast_map[i] = parent_charset->fast_map[i];
1094
1095 /* We also copy these for parents. */
1096 charset.min_char = parent_charset->min_char;
1097 charset.max_char = parent_charset->max_char;
1098 }
1099 else if (! NILP (args[charset_arg_superset]))
1100 {
1101 val = args[charset_arg_superset];
1102 charset.method = CHARSET_METHOD_SUPERSET;
1103 val = Fcopy_sequence (val);
1104 ASET (attrs, charset_superset, val);
1105
1106 charset.min_char = MAX_CHAR;
1107 charset.max_char = 0;
1108 for (; ! NILP (val); val = Fcdr (val))
1109 {
1110 Lisp_Object elt, car_part, cdr_part;
1111 int this_id, offset;
1112 struct charset *this_charset;
1113
1114 elt = Fcar (val);
1115 if (CONSP (elt))
1116 {
1117 car_part = XCAR (elt);
1118 cdr_part = XCDR (elt);
1119 CHECK_CHARSET_GET_ID (car_part, this_id);
1120 CHECK_NUMBER (cdr_part);
1121 offset = XINT (cdr_part);
1122 }
1123 else
1124 {
1125 CHECK_CHARSET_GET_ID (elt, this_id);
1126 offset = 0;
1127 }
1128 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1129
1130 this_charset = CHARSET_FROM_ID (this_id);
1131 if (charset.min_char > this_charset->min_char)
1132 charset.min_char = this_charset->min_char;
1133 if (charset.max_char < this_charset->max_char)
1134 charset.max_char = this_charset->max_char;
1135 for (i = 0; i < 190; i++)
1136 charset.fast_map[i] |= this_charset->fast_map[i];
1137 }
1138 }
1139 else
1140 error ("None of :code-offset, :map, :parents are specified");
1141
1142 val = args[charset_arg_unify_map];
1143 if (! NILP (val) && !STRINGP (val))
1144 CHECK_VECTOR (val);
1145 ASET (attrs, charset_unify_map, val);
1146
1147 CHECK_LIST (args[charset_arg_plist]);
1148 ASET (attrs, charset_plist, args[charset_arg_plist]);
1149
1150 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1151 &hash_code);
1152 if (charset.hash_index >= 0)
1153 {
1154 new_definition_p = 0;
1155 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1156 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1157 }
1158 else
1159 {
1160 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1161 hash_code);
1162 if (charset_table_used == charset_table_size)
1163 {
1164 struct charset *new_table
1165 = (struct charset *) xmalloc (sizeof (struct charset)
1166 * (charset_table_size + 16));
1167 memcpy (new_table, charset_table,
1168 sizeof (struct charset) * charset_table_size);
1169 charset_table_size += 16;
1170 charset_table = new_table;
1171 }
1172 id = charset_table_used++;
1173 new_definition_p = 1;
1174 }
1175
1176 ASET (attrs, charset_id, make_number (id));
1177 charset.id = id;
1178 charset_table[id] = charset;
1179
1180 if (charset.method == CHARSET_METHOD_MAP)
1181 {
1182 load_charset (&charset, 0);
1183 charset_table[id] = charset;
1184 }
1185
1186 if (charset.iso_final >= 0)
1187 {
1188 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1189 charset.iso_final) = id;
1190 if (new_definition_p)
1191 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1192 Fcons (make_number (id), Qnil));
1193 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1194 charset_jisx0201_roman = id;
1195 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1196 charset_jisx0208_1978 = id;
1197 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1198 charset_jisx0208 = id;
1199 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1200 charset_ksc5601 = id;
1201 }
1202
1203 if (charset.emacs_mule_id >= 0)
1204 {
1205 emacs_mule_charset[charset.emacs_mule_id] = id;
1206 if (charset.emacs_mule_id < 0xA0)
1207 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1208 else
1209 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1210 if (new_definition_p)
1211 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1212 Fcons (make_number (id), Qnil));
1213 }
1214
1215 if (new_definition_p)
1216 {
1217 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1218 if (charset.supplementary_p)
1219 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1220 Fcons (make_number (id), Qnil));
1221 else
1222 {
1223 Lisp_Object tail;
1224
1225 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1226 {
1227 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1228
1229 if (cs->supplementary_p)
1230 break;
1231 }
1232 if (EQ (tail, Vcharset_ordered_list))
1233 Vcharset_ordered_list = Fcons (make_number (id),
1234 Vcharset_ordered_list);
1235 else if (NILP (tail))
1236 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1237 Fcons (make_number (id), Qnil));
1238 else
1239 {
1240 val = Fcons (XCAR (tail), XCDR (tail));
1241 XSETCDR (tail, val);
1242 XSETCAR (tail, make_number (id));
1243 }
1244 }
1245 charset_ordered_list_tick++;
1246 }
1247
1248 return Qnil;
1249 }
1250
1251
1252 /* Same as Fdefine_charset_internal but arguments are more convenient
1253 to call from C (typically in syms_of_charset). This can define a
1254 charset of `offset' method only. Return the ID of the new
1255 charset. */
1256
1257 static int
1258 define_charset_internal (Lisp_Object name,
1259 int dimension,
1260 const char *code_space_chars,
1261 unsigned min_code, unsigned max_code,
1262 int iso_final, int iso_revision, int emacs_mule_id,
1263 int ascii_compatible, int supplementary,
1264 int code_offset)
1265 {
1266 const unsigned char *code_space = (const unsigned char *) code_space_chars;
1267 Lisp_Object args[charset_arg_max];
1268 Lisp_Object plist[14];
1269 Lisp_Object val;
1270 int i;
1271
1272 args[charset_arg_name] = name;
1273 args[charset_arg_dimension] = make_number (dimension);
1274 val = Fmake_vector (make_number (8), make_number (0));
1275 for (i = 0; i < 8; i++)
1276 ASET (val, i, make_number (code_space[i]));
1277 args[charset_arg_code_space] = val;
1278 args[charset_arg_min_code] = make_number (min_code);
1279 args[charset_arg_max_code] = make_number (max_code);
1280 args[charset_arg_iso_final]
1281 = (iso_final < 0 ? Qnil : make_number (iso_final));
1282 args[charset_arg_iso_revision] = make_number (iso_revision);
1283 args[charset_arg_emacs_mule_id]
1284 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1285 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1286 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1287 args[charset_arg_invalid_code] = Qnil;
1288 args[charset_arg_code_offset] = make_number (code_offset);
1289 args[charset_arg_map] = Qnil;
1290 args[charset_arg_subset] = Qnil;
1291 args[charset_arg_superset] = Qnil;
1292 args[charset_arg_unify_map] = Qnil;
1293
1294 plist[0] = intern_c_string (":name");
1295 plist[1] = args[charset_arg_name];
1296 plist[2] = intern_c_string (":dimension");
1297 plist[3] = args[charset_arg_dimension];
1298 plist[4] = intern_c_string (":code-space");
1299 plist[5] = args[charset_arg_code_space];
1300 plist[6] = intern_c_string (":iso-final-char");
1301 plist[7] = args[charset_arg_iso_final];
1302 plist[8] = intern_c_string (":emacs-mule-id");
1303 plist[9] = args[charset_arg_emacs_mule_id];
1304 plist[10] = intern_c_string (":ascii-compatible-p");
1305 plist[11] = args[charset_arg_ascii_compatible_p];
1306 plist[12] = intern_c_string (":code-offset");
1307 plist[13] = args[charset_arg_code_offset];
1308
1309 args[charset_arg_plist] = Flist (14, plist);
1310 Fdefine_charset_internal (charset_arg_max, args);
1311
1312 return XINT (CHARSET_SYMBOL_ID (name));
1313 }
1314
1315
1316 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1317 Sdefine_charset_alias, 2, 2, 0,
1318 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1319 (Lisp_Object alias, Lisp_Object charset)
1320 {
1321 Lisp_Object attr;
1322
1323 CHECK_CHARSET_GET_ATTR (charset, attr);
1324 Fputhash (alias, attr, Vcharset_hash_table);
1325 Vcharset_list = Fcons (alias, Vcharset_list);
1326 return Qnil;
1327 }
1328
1329
1330 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1331 doc: /* Return the property list of CHARSET. */)
1332 (Lisp_Object charset)
1333 {
1334 Lisp_Object attrs;
1335
1336 CHECK_CHARSET_GET_ATTR (charset, attrs);
1337 return CHARSET_ATTR_PLIST (attrs);
1338 }
1339
1340
1341 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1342 doc: /* Set CHARSET's property list to PLIST. */)
1343 (Lisp_Object charset, Lisp_Object plist)
1344 {
1345 Lisp_Object attrs;
1346
1347 CHECK_CHARSET_GET_ATTR (charset, attrs);
1348 CHARSET_ATTR_PLIST (attrs) = plist;
1349 return plist;
1350 }
1351
1352
1353 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1354 doc: /* Unify characters of CHARSET with Unicode.
1355 This means reading the relevant file and installing the table defined
1356 by CHARSET's `:unify-map' property.
1357
1358 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1359 the same meaning as the `:unify-map' attribute in the function
1360 `define-charset' (which see).
1361
1362 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1363 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1364 {
1365 int id;
1366 struct charset *cs;
1367
1368 CHECK_CHARSET_GET_ID (charset, id);
1369 cs = CHARSET_FROM_ID (id);
1370 if (NILP (deunify)
1371 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1372 : ! CHARSET_UNIFIED_P (cs))
1373 return Qnil;
1374
1375 CHARSET_UNIFIED_P (cs) = 0;
1376 if (NILP (deunify))
1377 {
1378 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1379 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1380 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1381 if (NILP (unify_map))
1382 unify_map = CHARSET_UNIFY_MAP (cs);
1383 else
1384 {
1385 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1386 signal_error ("Bad unify-map", unify_map);
1387 CHARSET_UNIFY_MAP (cs) = unify_map;
1388 }
1389 if (NILP (Vchar_unify_table))
1390 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1391 char_table_set_range (Vchar_unify_table,
1392 cs->min_char, cs->max_char, charset);
1393 CHARSET_UNIFIED_P (cs) = 1;
1394 }
1395 else if (CHAR_TABLE_P (Vchar_unify_table))
1396 {
1397 int min_code = CHARSET_MIN_CODE (cs);
1398 int max_code = CHARSET_MAX_CODE (cs);
1399 int min_char = DECODE_CHAR (cs, min_code);
1400 int max_char = DECODE_CHAR (cs, max_code);
1401
1402 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1403 }
1404
1405 return Qnil;
1406 }
1407
1408 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1409 Sget_unused_iso_final_char, 2, 2, 0,
1410 doc: /*
1411 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1412 DIMENSION is the number of bytes to represent a character: 1 or 2.
1413 CHARS is the number of characters in a dimension: 94 or 96.
1414
1415 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1416 If there's no unused final char for the specified kind of charset,
1417 return nil. */)
1418 (Lisp_Object dimension, Lisp_Object chars)
1419 {
1420 int final_char;
1421
1422 CHECK_NUMBER (dimension);
1423 CHECK_NUMBER (chars);
1424 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1425 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1426 if (XINT (chars) != 94 && XINT (chars) != 96)
1427 args_out_of_range_3 (chars, make_number (94), make_number (96));
1428 for (final_char = '0'; final_char <= '?'; final_char++)
1429 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1430 break;
1431 return (final_char <= '?' ? make_number (final_char) : Qnil);
1432 }
1433
1434 static void
1435 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
1436 {
1437 CHECK_NATNUM (dimension);
1438 CHECK_NATNUM (chars);
1439 CHECK_NATNUM (final_char);
1440
1441 if (XINT (dimension) > 3)
1442 error ("Invalid DIMENSION %"pEd", it should be 1, 2, or 3",
1443 XINT (dimension));
1444 if (XINT (chars) != 94 && XINT (chars) != 96)
1445 error ("Invalid CHARS %"pEd", it should be 94 or 96", XINT (chars));
1446 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1447 {
1448 unsigned char str[MAX_MULTIBYTE_LENGTH + 1];
1449 int len = CHAR_STRING (XINT (chars), str);
1450 str[len] = '\0';
1451 error ("Invalid FINAL-CHAR %s, it should be `0'..`~'", str);
1452 }
1453 }
1454
1455
1456 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1457 4, 4, 0,
1458 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1459
1460 On decoding by an ISO-2022 base coding system, when a charset
1461 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1462 if CHARSET is designated instead. */)
1463 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1464 {
1465 int id;
1466 int chars_flag;
1467
1468 CHECK_CHARSET_GET_ID (charset, id);
1469 check_iso_charset_parameter (dimension, chars, final_char);
1470 chars_flag = XINT (chars) == 96;
1471 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1472 return Qnil;
1473 }
1474
1475
1476 /* Return information about charsets in the text at PTR of NBYTES
1477 bytes, which are NCHARS characters. The value is:
1478
1479 0: Each character is represented by one byte. This is always
1480 true for a unibyte string. For a multibyte string, true if
1481 it contains only ASCII characters.
1482
1483 1: No charsets other than ascii, control-1, and latin-1 are
1484 found.
1485
1486 2: Otherwise.
1487 */
1488
1489 int
1490 string_xstring_p (Lisp_Object string)
1491 {
1492 const unsigned char *p = SDATA (string);
1493 const unsigned char *endp = p + SBYTES (string);
1494
1495 if (SCHARS (string) == SBYTES (string))
1496 return 0;
1497
1498 while (p < endp)
1499 {
1500 int c = STRING_CHAR_ADVANCE (p);
1501
1502 if (c >= 0x100)
1503 return 2;
1504 }
1505 return 1;
1506 }
1507
1508
1509 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1510
1511 CHARSETS is a vector. If Nth element is non-nil, it means the
1512 charset whose id is N is already found.
1513
1514 It may lookup a translation table TABLE if supplied. */
1515
1516 static void
1517 find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
1518 {
1519 const unsigned char *pend = ptr + nbytes;
1520
1521 if (nchars == nbytes)
1522 {
1523 if (multibyte)
1524 ASET (charsets, charset_ascii, Qt);
1525 else
1526 while (ptr < pend)
1527 {
1528 int c = *ptr++;
1529
1530 if (!NILP (table))
1531 c = translate_char (table, c);
1532 if (ASCII_BYTE_P (c))
1533 ASET (charsets, charset_ascii, Qt);
1534 else
1535 ASET (charsets, charset_eight_bit, Qt);
1536 }
1537 }
1538 else
1539 {
1540 while (ptr < pend)
1541 {
1542 int c = STRING_CHAR_ADVANCE (ptr);
1543 struct charset *charset;
1544
1545 if (!NILP (table))
1546 c = translate_char (table, c);
1547 charset = CHAR_CHARSET (c);
1548 ASET (charsets, CHARSET_ID (charset), Qt);
1549 }
1550 }
1551 }
1552
1553 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1554 2, 3, 0,
1555 doc: /* Return a list of charsets in the region between BEG and END.
1556 BEG and END are buffer positions.
1557 Optional arg TABLE if non-nil is a translation table to look up.
1558
1559 If the current buffer is unibyte, the returned list may contain
1560 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1561 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1562 {
1563 Lisp_Object charsets;
1564 EMACS_INT from, from_byte, to, stop, stop_byte;
1565 int i;
1566 Lisp_Object val;
1567 int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1568
1569 validate_region (&beg, &end);
1570 from = XFASTINT (beg);
1571 stop = to = XFASTINT (end);
1572
1573 if (from < GPT && GPT < to)
1574 {
1575 stop = GPT;
1576 stop_byte = GPT_BYTE;
1577 }
1578 else
1579 stop_byte = CHAR_TO_BYTE (stop);
1580
1581 from_byte = CHAR_TO_BYTE (from);
1582
1583 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1584 while (1)
1585 {
1586 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1587 stop_byte - from_byte, charsets, table,
1588 multibyte);
1589 if (stop < to)
1590 {
1591 from = stop, from_byte = stop_byte;
1592 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1593 }
1594 else
1595 break;
1596 }
1597
1598 val = Qnil;
1599 for (i = charset_table_used - 1; i >= 0; i--)
1600 if (!NILP (AREF (charsets, i)))
1601 val = Fcons (CHARSET_NAME (charset_table + i), val);
1602 return val;
1603 }
1604
1605 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1606 1, 2, 0,
1607 doc: /* Return a list of charsets in STR.
1608 Optional arg TABLE if non-nil is a translation table to look up.
1609
1610 If STR is unibyte, the returned list may contain
1611 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1612 (Lisp_Object str, Lisp_Object table)
1613 {
1614 Lisp_Object charsets;
1615 int i;
1616 Lisp_Object val;
1617
1618 CHECK_STRING (str);
1619
1620 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1621 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1622 charsets, table,
1623 STRING_MULTIBYTE (str));
1624 val = Qnil;
1625 for (i = charset_table_used - 1; i >= 0; i--)
1626 if (!NILP (AREF (charsets, i)))
1627 val = Fcons (CHARSET_NAME (charset_table + i), val);
1628 return val;
1629 }
1630
1631 \f
1632
1633 /* Return a unified character code for C (>= 0x110000). VAL is a
1634 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1635 charset symbol. */
1636 int
1637 maybe_unify_char (int c, Lisp_Object val)
1638 {
1639 struct charset *charset;
1640
1641 if (INTEGERP (val))
1642 return XINT (val);
1643 if (NILP (val))
1644 return c;
1645
1646 CHECK_CHARSET_GET_CHARSET (val, charset);
1647 load_charset (charset, 1);
1648 if (! inhibit_load_charset_map)
1649 {
1650 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1651 if (! NILP (val))
1652 c = XINT (val);
1653 }
1654 else
1655 {
1656 int code_index = c - CHARSET_CODE_OFFSET (charset);
1657 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1658
1659 if (unified > 0)
1660 c = unified;
1661 }
1662 return c;
1663 }
1664
1665
1666 /* Return a character correponding to the code-point CODE of
1667 CHARSET. */
1668
1669 int
1670 decode_char (struct charset *charset, unsigned int code)
1671 {
1672 int c, char_index;
1673 enum charset_method method = CHARSET_METHOD (charset);
1674
1675 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1676 return -1;
1677
1678 if (method == CHARSET_METHOD_SUBSET)
1679 {
1680 Lisp_Object subset_info;
1681
1682 subset_info = CHARSET_SUBSET (charset);
1683 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1684 code -= XINT (AREF (subset_info, 3));
1685 if (code < XFASTINT (AREF (subset_info, 1))
1686 || code > XFASTINT (AREF (subset_info, 2)))
1687 c = -1;
1688 else
1689 c = DECODE_CHAR (charset, code);
1690 }
1691 else if (method == CHARSET_METHOD_SUPERSET)
1692 {
1693 Lisp_Object parents;
1694
1695 parents = CHARSET_SUPERSET (charset);
1696 c = -1;
1697 for (; CONSP (parents); parents = XCDR (parents))
1698 {
1699 int id = XINT (XCAR (XCAR (parents)));
1700 int code_offset = XINT (XCDR (XCAR (parents)));
1701 unsigned this_code = code - code_offset;
1702
1703 charset = CHARSET_FROM_ID (id);
1704 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1705 break;
1706 }
1707 }
1708 else
1709 {
1710 char_index = CODE_POINT_TO_INDEX (charset, code);
1711 if (char_index < 0)
1712 return -1;
1713
1714 if (method == CHARSET_METHOD_MAP)
1715 {
1716 Lisp_Object decoder;
1717
1718 decoder = CHARSET_DECODER (charset);
1719 if (! VECTORP (decoder))
1720 {
1721 load_charset (charset, 1);
1722 decoder = CHARSET_DECODER (charset);
1723 }
1724 if (VECTORP (decoder))
1725 c = XINT (AREF (decoder, char_index));
1726 else
1727 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1728 }
1729 else /* method == CHARSET_METHOD_OFFSET */
1730 {
1731 c = char_index + CHARSET_CODE_OFFSET (charset);
1732 if (CHARSET_UNIFIED_P (charset)
1733 && c > MAX_UNICODE_CHAR)
1734 MAYBE_UNIFY_CHAR (c);
1735 }
1736 }
1737
1738 return c;
1739 }
1740
1741 /* Variable used temporarily by the macro ENCODE_CHAR. */
1742 Lisp_Object charset_work;
1743
1744 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1745 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1746 use CHARSET's strict_max_char instead of max_char. */
1747
1748 unsigned
1749 encode_char (struct charset *charset, int c)
1750 {
1751 unsigned code;
1752 enum charset_method method = CHARSET_METHOD (charset);
1753
1754 if (CHARSET_UNIFIED_P (charset))
1755 {
1756 Lisp_Object deunifier;
1757 int code_index = -1;
1758
1759 deunifier = CHARSET_DEUNIFIER (charset);
1760 if (! CHAR_TABLE_P (deunifier))
1761 {
1762 load_charset (charset, 2);
1763 deunifier = CHARSET_DEUNIFIER (charset);
1764 }
1765 if (CHAR_TABLE_P (deunifier))
1766 {
1767 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1768
1769 if (INTEGERP (deunified))
1770 code_index = XINT (deunified);
1771 }
1772 else
1773 {
1774 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1775 }
1776 if (code_index >= 0)
1777 c = CHARSET_CODE_OFFSET (charset) + code_index;
1778 }
1779
1780 if (method == CHARSET_METHOD_SUBSET)
1781 {
1782 Lisp_Object subset_info;
1783 struct charset *this_charset;
1784
1785 subset_info = CHARSET_SUBSET (charset);
1786 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1787 code = ENCODE_CHAR (this_charset, c);
1788 if (code == CHARSET_INVALID_CODE (this_charset)
1789 || code < XFASTINT (AREF (subset_info, 1))
1790 || code > XFASTINT (AREF (subset_info, 2)))
1791 return CHARSET_INVALID_CODE (charset);
1792 code += XINT (AREF (subset_info, 3));
1793 return code;
1794 }
1795
1796 if (method == CHARSET_METHOD_SUPERSET)
1797 {
1798 Lisp_Object parents;
1799
1800 parents = CHARSET_SUPERSET (charset);
1801 for (; CONSP (parents); parents = XCDR (parents))
1802 {
1803 int id = XINT (XCAR (XCAR (parents)));
1804 int code_offset = XINT (XCDR (XCAR (parents)));
1805 struct charset *this_charset = CHARSET_FROM_ID (id);
1806
1807 code = ENCODE_CHAR (this_charset, c);
1808 if (code != CHARSET_INVALID_CODE (this_charset))
1809 return code + code_offset;
1810 }
1811 return CHARSET_INVALID_CODE (charset);
1812 }
1813
1814 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1815 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1816 return CHARSET_INVALID_CODE (charset);
1817
1818 if (method == CHARSET_METHOD_MAP)
1819 {
1820 Lisp_Object encoder;
1821 Lisp_Object val;
1822
1823 encoder = CHARSET_ENCODER (charset);
1824 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1825 {
1826 load_charset (charset, 2);
1827 encoder = CHARSET_ENCODER (charset);
1828 }
1829 if (CHAR_TABLE_P (encoder))
1830 {
1831 val = CHAR_TABLE_REF (encoder, c);
1832 if (NILP (val))
1833 return CHARSET_INVALID_CODE (charset);
1834 code = XINT (val);
1835 if (! CHARSET_COMPACT_CODES_P (charset))
1836 code = INDEX_TO_CODE_POINT (charset, code);
1837 }
1838 else
1839 {
1840 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1841 code = INDEX_TO_CODE_POINT (charset, code);
1842 }
1843 }
1844 else /* method == CHARSET_METHOD_OFFSET */
1845 {
1846 int code_index = c - CHARSET_CODE_OFFSET (charset);
1847
1848 code = INDEX_TO_CODE_POINT (charset, code_index);
1849 }
1850
1851 return code;
1852 }
1853
1854
1855 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1856 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1857 Return nil if CODE-POINT is not valid in CHARSET.
1858
1859 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1860
1861 Optional argument RESTRICTION specifies a way to map the pair of CCS
1862 and CODE-POINT to a character. Currently not supported and just ignored. */)
1863 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
1864 {
1865 int c, id;
1866 unsigned code;
1867 struct charset *charsetp;
1868
1869 CHECK_CHARSET_GET_ID (charset, id);
1870 if (CONSP (code_point))
1871 {
1872 CHECK_NATNUM_CAR (code_point);
1873 CHECK_NATNUM_CDR (code_point);
1874 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1875 }
1876 else
1877 {
1878 CHECK_NATNUM (code_point);
1879 code = XINT (code_point);
1880 }
1881 charsetp = CHARSET_FROM_ID (id);
1882 c = DECODE_CHAR (charsetp, code);
1883 return (c >= 0 ? make_number (c) : Qnil);
1884 }
1885
1886
1887 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1888 doc: /* Encode the character CH into a code-point of CHARSET.
1889 Return nil if CHARSET doesn't include CH.
1890
1891 Optional argument RESTRICTION specifies a way to map CH to a
1892 code-point in CCS. Currently not supported and just ignored. */)
1893 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
1894 {
1895 int id;
1896 unsigned code;
1897 struct charset *charsetp;
1898
1899 CHECK_CHARSET_GET_ID (charset, id);
1900 CHECK_NATNUM (ch);
1901 charsetp = CHARSET_FROM_ID (id);
1902 code = ENCODE_CHAR (charsetp, XINT (ch));
1903 if (code == CHARSET_INVALID_CODE (charsetp))
1904 return Qnil;
1905 if (code > 0x7FFFFFF)
1906 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1907 return make_number (code);
1908 }
1909
1910
1911 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1912 doc:
1913 /* Return a character of CHARSET whose position codes are CODEn.
1914
1915 CODE1 through CODE4 are optional, but if you don't supply sufficient
1916 position codes, it is assumed that the minimum code in each dimension
1917 is specified. */)
1918 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1919 {
1920 int id, dimension;
1921 struct charset *charsetp;
1922 unsigned code;
1923 int c;
1924
1925 CHECK_CHARSET_GET_ID (charset, id);
1926 charsetp = CHARSET_FROM_ID (id);
1927
1928 dimension = CHARSET_DIMENSION (charsetp);
1929 if (NILP (code1))
1930 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1931 ? 0 : CHARSET_MIN_CODE (charsetp));
1932 else
1933 {
1934 CHECK_NATNUM (code1);
1935 if (XFASTINT (code1) >= 0x100)
1936 args_out_of_range (make_number (0xFF), code1);
1937 code = XFASTINT (code1);
1938
1939 if (dimension > 1)
1940 {
1941 code <<= 8;
1942 if (NILP (code2))
1943 code |= charsetp->code_space[(dimension - 2) * 4];
1944 else
1945 {
1946 CHECK_NATNUM (code2);
1947 if (XFASTINT (code2) >= 0x100)
1948 args_out_of_range (make_number (0xFF), code2);
1949 code |= XFASTINT (code2);
1950 }
1951
1952 if (dimension > 2)
1953 {
1954 code <<= 8;
1955 if (NILP (code3))
1956 code |= charsetp->code_space[(dimension - 3) * 4];
1957 else
1958 {
1959 CHECK_NATNUM (code3);
1960 if (XFASTINT (code3) >= 0x100)
1961 args_out_of_range (make_number (0xFF), code3);
1962 code |= XFASTINT (code3);
1963 }
1964
1965 if (dimension > 3)
1966 {
1967 code <<= 8;
1968 if (NILP (code4))
1969 code |= charsetp->code_space[0];
1970 else
1971 {
1972 CHECK_NATNUM (code4);
1973 if (XFASTINT (code4) >= 0x100)
1974 args_out_of_range (make_number (0xFF), code4);
1975 code |= XFASTINT (code4);
1976 }
1977 }
1978 }
1979 }
1980 }
1981
1982 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1983 code &= 0x7F7F7F7F;
1984 c = DECODE_CHAR (charsetp, code);
1985 if (c < 0)
1986 error ("Invalid code(s)");
1987 return make_number (c);
1988 }
1989
1990
1991 /* Return the first charset in CHARSET_LIST that contains C.
1992 CHARSET_LIST is a list of charset IDs. If it is nil, use
1993 Vcharset_ordered_list. */
1994
1995 struct charset *
1996 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
1997 {
1998 int maybe_null = 0;
1999
2000 if (NILP (charset_list))
2001 charset_list = Vcharset_ordered_list;
2002 else
2003 maybe_null = 1;
2004
2005 while (CONSP (charset_list))
2006 {
2007 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2008 unsigned code = ENCODE_CHAR (charset, c);
2009
2010 if (code != CHARSET_INVALID_CODE (charset))
2011 {
2012 if (code_return)
2013 *code_return = code;
2014 return charset;
2015 }
2016 charset_list = XCDR (charset_list);
2017 if (! maybe_null
2018 && c <= MAX_UNICODE_CHAR
2019 && EQ (charset_list, Vcharset_non_preferred_head))
2020 return CHARSET_FROM_ID (charset_unicode);
2021 }
2022 return (maybe_null ? NULL
2023 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2024 : CHARSET_FROM_ID (charset_eight_bit));
2025 }
2026
2027
2028 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2029 doc:
2030 /*Return list of charset and one to four position-codes of CH.
2031 The charset is decided by the current priority order of charsets.
2032 A position-code is a byte value of each dimension of the code-point of
2033 CH in the charset. */)
2034 (Lisp_Object ch)
2035 {
2036 struct charset *charset;
2037 int c, dimension;
2038 unsigned code;
2039 Lisp_Object val;
2040
2041 CHECK_CHARACTER (ch);
2042 c = XFASTINT (ch);
2043 charset = CHAR_CHARSET (c);
2044 if (! charset)
2045 abort ();
2046 code = ENCODE_CHAR (charset, c);
2047 if (code == CHARSET_INVALID_CODE (charset))
2048 abort ();
2049 dimension = CHARSET_DIMENSION (charset);
2050 for (val = Qnil; dimension > 0; dimension--)
2051 {
2052 val = Fcons (make_number (code & 0xFF), val);
2053 code >>= 8;
2054 }
2055 return Fcons (CHARSET_NAME (charset), val);
2056 }
2057
2058
2059 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2060 doc: /* Return the charset of highest priority that contains CH.
2061 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2062 from which to find the charset. It may also be a coding system. In
2063 that case, find the charset from what supported by that coding system. */)
2064 (Lisp_Object ch, Lisp_Object restriction)
2065 {
2066 struct charset *charset;
2067
2068 CHECK_CHARACTER (ch);
2069 if (NILP (restriction))
2070 charset = CHAR_CHARSET (XINT (ch));
2071 else
2072 {
2073 if (CONSP (restriction))
2074 {
2075 int c = XFASTINT (ch);
2076
2077 for (; CONSP (restriction); restriction = XCDR (restriction))
2078 {
2079 struct charset *rcharset;
2080
2081 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2082 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
2083 return XCAR (restriction);
2084 }
2085 return Qnil;
2086 }
2087 restriction = coding_system_charset_list (restriction);
2088 charset = char_charset (XINT (ch), restriction, NULL);
2089 if (! charset)
2090 return Qnil;
2091 }
2092 return (CHARSET_NAME (charset));
2093 }
2094
2095
2096 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2097 doc: /*
2098 Return charset of a character in the current buffer at position POS.
2099 If POS is nil, it defauls to the current point.
2100 If POS is out of range, the value is nil. */)
2101 (Lisp_Object pos)
2102 {
2103 Lisp_Object ch;
2104 struct charset *charset;
2105
2106 ch = Fchar_after (pos);
2107 if (! INTEGERP (ch))
2108 return ch;
2109 charset = CHAR_CHARSET (XINT (ch));
2110 return (CHARSET_NAME (charset));
2111 }
2112
2113
2114 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2115 doc: /*
2116 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2117
2118 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2119 by their DIMENSION, CHARS, and FINAL-CHAR,
2120 whereas Emacs distinguishes them by charset symbol.
2121 See the documentation of the function `charset-info' for the meanings of
2122 DIMENSION, CHARS, and FINAL-CHAR. */)
2123 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2124 {
2125 int id;
2126 int chars_flag;
2127
2128 check_iso_charset_parameter (dimension, chars, final_char);
2129 chars_flag = XFASTINT (chars) == 96;
2130 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2131 XFASTINT (final_char));
2132 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2133 }
2134
2135
2136 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2137 0, 0, 0,
2138 doc: /*
2139 Internal use only.
2140 Clear temporary charset mapping tables.
2141 It should be called only from temacs invoked for dumping. */)
2142 (void)
2143 {
2144 if (temp_charset_work)
2145 {
2146 free (temp_charset_work);
2147 temp_charset_work = NULL;
2148 }
2149
2150 if (CHAR_TABLE_P (Vchar_unify_table))
2151 Foptimize_char_table (Vchar_unify_table, Qnil);
2152
2153 return Qnil;
2154 }
2155
2156 DEFUN ("charset-priority-list", Fcharset_priority_list,
2157 Scharset_priority_list, 0, 1, 0,
2158 doc: /* Return the list of charsets ordered by priority.
2159 HIGHESTP non-nil means just return the highest priority one. */)
2160 (Lisp_Object highestp)
2161 {
2162 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2163
2164 if (!NILP (highestp))
2165 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2166
2167 while (!NILP (list))
2168 {
2169 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2170 list = XCDR (list);
2171 }
2172 return Fnreverse (val);
2173 }
2174
2175 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2176 1, MANY, 0,
2177 doc: /* Assign higher priority to the charsets given as arguments.
2178 usage: (set-charset-priority &rest charsets) */)
2179 (size_t nargs, Lisp_Object *args)
2180 {
2181 Lisp_Object new_head, old_list, arglist[2];
2182 Lisp_Object list_2022, list_emacs_mule;
2183 size_t i;
2184 int id;
2185
2186 old_list = Fcopy_sequence (Vcharset_ordered_list);
2187 new_head = Qnil;
2188 for (i = 0; i < nargs; i++)
2189 {
2190 CHECK_CHARSET_GET_ID (args[i], id);
2191 if (! NILP (Fmemq (make_number (id), old_list)))
2192 {
2193 old_list = Fdelq (make_number (id), old_list);
2194 new_head = Fcons (make_number (id), new_head);
2195 }
2196 }
2197 arglist[0] = Fnreverse (new_head);
2198 arglist[1] = Vcharset_non_preferred_head = old_list;
2199 Vcharset_ordered_list = Fnconc (2, arglist);
2200 charset_ordered_list_tick++;
2201
2202 charset_unibyte = -1;
2203 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2204 CONSP (old_list); old_list = XCDR (old_list))
2205 {
2206 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2207 list_2022 = Fcons (XCAR (old_list), list_2022);
2208 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2209 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2210 if (charset_unibyte < 0)
2211 {
2212 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2213
2214 if (CHARSET_DIMENSION (charset) == 1
2215 && CHARSET_ASCII_COMPATIBLE_P (charset)
2216 && CHARSET_MAX_CHAR (charset) >= 0x80)
2217 charset_unibyte = CHARSET_ID (charset);
2218 }
2219 }
2220 Viso_2022_charset_list = Fnreverse (list_2022);
2221 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2222 if (charset_unibyte < 0)
2223 charset_unibyte = charset_iso_8859_1;
2224
2225 return Qnil;
2226 }
2227
2228 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2229 0, 1, 0,
2230 doc: /* Internal use only.
2231 Return charset identification number of CHARSET. */)
2232 (Lisp_Object charset)
2233 {
2234 int id;
2235
2236 CHECK_CHARSET_GET_ID (charset, id);
2237 return make_number (id);
2238 }
2239
2240 struct charset_sort_data
2241 {
2242 Lisp_Object charset;
2243 int id;
2244 int priority;
2245 };
2246
2247 static int
2248 charset_compare (const void *d1, const void *d2)
2249 {
2250 const struct charset_sort_data *data1 = d1, *data2 = d2;
2251 return (data1->priority - data2->priority);
2252 }
2253
2254 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2255 doc: /* Sort charset list CHARSETS by a priority of each charset.
2256 Return the sorted list. CHARSETS is modified by side effects.
2257 See also `charset-priority-list' and `set-charset-priority'. */)
2258 (Lisp_Object charsets)
2259 {
2260 Lisp_Object len = Flength (charsets);
2261 int n = XFASTINT (len), i, j, done;
2262 Lisp_Object tail, elt, attrs;
2263 struct charset_sort_data *sort_data;
2264 int id, min_id = INT_MAX, max_id = INT_MIN;
2265 USE_SAFE_ALLOCA;
2266
2267 if (n == 0)
2268 return Qnil;
2269 SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
2270 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2271 {
2272 elt = XCAR (tail);
2273 CHECK_CHARSET_GET_ATTR (elt, attrs);
2274 sort_data[i].charset = elt;
2275 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2276 if (id < min_id)
2277 min_id = id;
2278 if (id > max_id)
2279 max_id = id;
2280 }
2281 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2282 done < n && CONSP (tail); tail = XCDR (tail), i++)
2283 {
2284 elt = XCAR (tail);
2285 id = XFASTINT (elt);
2286 if (id >= min_id && id <= max_id)
2287 for (j = 0; j < n; j++)
2288 if (sort_data[j].id == id)
2289 {
2290 sort_data[j].priority = i;
2291 done++;
2292 }
2293 }
2294 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2295 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2296 XSETCAR (tail, sort_data[i].charset);
2297 SAFE_FREE ();
2298 return charsets;
2299 }
2300
2301 \f
2302 void
2303 init_charset (void)
2304 {
2305 Lisp_Object tempdir;
2306 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2307 if (access (SSDATA (tempdir), 0) < 0)
2308 {
2309 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2310 Emacs will not function correctly without the character map files.\n\
2311 Please check your installation!\n",
2312 tempdir);
2313 /* TODO should this be a fatal error? (Bug#909) */
2314 }
2315
2316 Vcharset_map_path = Fcons (tempdir, Qnil);
2317 }
2318
2319
2320 void
2321 init_charset_once (void)
2322 {
2323 int i, j, k;
2324
2325 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2326 for (j = 0; j < ISO_MAX_CHARS; j++)
2327 for (k = 0; k < ISO_MAX_FINAL; k++)
2328 iso_charset_table[i][j][k] = -1;
2329
2330 for (i = 0; i < 256; i++)
2331 emacs_mule_charset[i] = -1;
2332
2333 charset_jisx0201_roman = -1;
2334 charset_jisx0208_1978 = -1;
2335 charset_jisx0208 = -1;
2336 charset_ksc5601 = -1;
2337 }
2338
2339 #ifdef emacs
2340
2341 void
2342 syms_of_charset (void)
2343 {
2344 DEFSYM (Qcharsetp, "charsetp");
2345
2346 DEFSYM (Qascii, "ascii");
2347 DEFSYM (Qunicode, "unicode");
2348 DEFSYM (Qemacs, "emacs");
2349 DEFSYM (Qeight_bit, "eight-bit");
2350 DEFSYM (Qiso_8859_1, "iso-8859-1");
2351
2352 DEFSYM (Qgl, "gl");
2353 DEFSYM (Qgr, "gr");
2354
2355 staticpro (&Vcharset_ordered_list);
2356 Vcharset_ordered_list = Qnil;
2357
2358 staticpro (&Viso_2022_charset_list);
2359 Viso_2022_charset_list = Qnil;
2360
2361 staticpro (&Vemacs_mule_charset_list);
2362 Vemacs_mule_charset_list = Qnil;
2363
2364 /* Don't staticpro them here. It's done in syms_of_fns. */
2365 QCtest = intern_c_string (":test");
2366 Qeq = intern_c_string ("eq");
2367
2368 staticpro (&Vcharset_hash_table);
2369 {
2370 Lisp_Object args[2];
2371 args[0] = QCtest;
2372 args[1] = Qeq;
2373 Vcharset_hash_table = Fmake_hash_table (2, args);
2374 }
2375
2376 charset_table_size = 128;
2377 charset_table = ((struct charset *)
2378 xmalloc (sizeof (struct charset) * charset_table_size));
2379 charset_table_used = 0;
2380
2381 defsubr (&Scharsetp);
2382 defsubr (&Smap_charset_chars);
2383 defsubr (&Sdefine_charset_internal);
2384 defsubr (&Sdefine_charset_alias);
2385 defsubr (&Scharset_plist);
2386 defsubr (&Sset_charset_plist);
2387 defsubr (&Sunify_charset);
2388 defsubr (&Sget_unused_iso_final_char);
2389 defsubr (&Sdeclare_equiv_charset);
2390 defsubr (&Sfind_charset_region);
2391 defsubr (&Sfind_charset_string);
2392 defsubr (&Sdecode_char);
2393 defsubr (&Sencode_char);
2394 defsubr (&Ssplit_char);
2395 defsubr (&Smake_char);
2396 defsubr (&Schar_charset);
2397 defsubr (&Scharset_after);
2398 defsubr (&Siso_charset);
2399 defsubr (&Sclear_charset_maps);
2400 defsubr (&Scharset_priority_list);
2401 defsubr (&Sset_charset_priority);
2402 defsubr (&Scharset_id_internal);
2403 defsubr (&Ssort_charsets);
2404
2405 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
2406 doc: /* *List of directories to search for charset map files. */);
2407 Vcharset_map_path = Qnil;
2408
2409 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
2410 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2411 inhibit_load_charset_map = 0;
2412
2413 DEFVAR_LISP ("charset-list", Vcharset_list,
2414 doc: /* List of all charsets ever defined. */);
2415 Vcharset_list = Qnil;
2416
2417 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
2418 doc: /* ISO639 language mnemonic symbol for the current language environment.
2419 If the current language environment is for multiple languages (e.g. "Latin-1"),
2420 the value may be a list of mnemonics. */);
2421 Vcurrent_iso639_language = Qnil;
2422
2423 charset_ascii
2424 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2425 0, 127, 'B', -1, 0, 1, 0, 0);
2426 charset_iso_8859_1
2427 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2428 0, 255, -1, -1, -1, 1, 0, 0);
2429 charset_unicode
2430 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2431 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2432 charset_emacs
2433 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2434 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2435 charset_eight_bit
2436 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2437 128, 255, -1, 0, -1, 0, 1,
2438 MAX_5_BYTE_CHAR + 1);
2439 charset_unibyte = charset_iso_8859_1;
2440 }
2441
2442 #endif /* emacs */