]> code.delx.au - gnu-emacs/blob - src/charset.c
* pcomplete.el (pcomplete-comint-setup): Check for
[gnu-emacs] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include "lisp.h"
35 #include "character.h"
36 #include "charset.h"
37 #include "coding.h"
38 #include "disptab.h"
39 #include "buffer.h"
40
41 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
42
43 A coded character set ("charset" hereafter) is a meaningful
44 collection (i.e. language, culture, functionality, etc.) of
45 characters. Emacs handles multiple charsets at once. In Emacs Lisp
46 code, a charset is represented by a symbol. In C code, a charset is
47 represented by its ID number or by a pointer to a struct charset.
48
49 The actual information about each charset is stored in two places.
50 Lispy information is stored in the hash table Vcharset_hash_table as
51 a vector (charset attributes). The other information is stored in
52 charset_table as a struct charset.
53
54 */
55
56 /* List of all charsets. This variable is used only from Emacs
57 Lisp. */
58 Lisp_Object Vcharset_list;
59
60 /* Hash table that contains attributes of each charset. Keys are
61 charset symbols, and values are vectors of charset attributes. */
62 Lisp_Object Vcharset_hash_table;
63
64 /* Table of struct charset. */
65 struct charset *charset_table;
66
67 static int charset_table_size;
68 static int charset_table_used;
69
70 Lisp_Object Qcharsetp;
71
72 /* Special charset symbols. */
73 Lisp_Object Qascii;
74 Lisp_Object Qeight_bit;
75 Lisp_Object Qiso_8859_1;
76 Lisp_Object Qunicode;
77 Lisp_Object Qemacs;
78
79 /* The corresponding charsets. */
80 int charset_ascii;
81 int charset_eight_bit;
82 int charset_iso_8859_1;
83 int charset_unicode;
84 int charset_emacs;
85
86 /* The other special charsets. */
87 int charset_jisx0201_roman;
88 int charset_jisx0208_1978;
89 int charset_jisx0208;
90 int charset_ksc5601;
91
92 /* Value of charset attribute `charset-iso-plane'. */
93 Lisp_Object Qgl, Qgr;
94
95 /* Charset of unibyte characters. */
96 int charset_unibyte;
97
98 /* List of charsets ordered by the priority. */
99 Lisp_Object Vcharset_ordered_list;
100
101 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
102 charsets. */
103 Lisp_Object Vcharset_non_preferred_head;
104
105 /* Incremented everytime we change Vcharset_ordered_list. This is
106 unsigned short so that it fits in Lisp_Int and never matches
107 -1. */
108 unsigned short charset_ordered_list_tick;
109
110 /* List of iso-2022 charsets. */
111 Lisp_Object Viso_2022_charset_list;
112
113 /* List of emacs-mule charsets. */
114 Lisp_Object Vemacs_mule_charset_list;
115
116 struct charset *emacs_mule_charset[256];
117
118 /* Mapping table from ISO2022's charset (specified by DIMENSION,
119 CHARS, and FINAL-CHAR) to Emacs' charset. */
120 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
121
122 Lisp_Object Vcharset_map_path;
123
124 /* If nonzero, don't load charset maps. */
125 int inhibit_load_charset_map;
126
127 Lisp_Object Vcurrent_iso639_language;
128
129 /* Defined in chartab.c */
130 extern void
131 map_char_table_for_charset P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
132 Lisp_Object function, Lisp_Object table,
133 Lisp_Object arg, struct charset *charset,
134 unsigned from, unsigned to));
135
136 #define CODE_POINT_TO_INDEX(charset, code) \
137 ((charset)->code_linear_p \
138 ? (code) - (charset)->min_code \
139 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
140 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
141 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
142 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
143 ? (((((code) >> 24) - (charset)->code_space[12]) \
144 * (charset)->code_space[11]) \
145 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
146 * (charset)->code_space[7]) \
147 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
148 * (charset)->code_space[3]) \
149 + (((code) & 0xFF) - (charset)->code_space[0]) \
150 - ((charset)->char_index_offset)) \
151 : -1)
152
153
154 /* Convert the character index IDX to code-point CODE for CHARSET.
155 It is assumed that IDX is in a valid range. */
156
157 #define INDEX_TO_CODE_POINT(charset, idx) \
158 ((charset)->code_linear_p \
159 ? (idx) + (charset)->min_code \
160 : (idx += (charset)->char_index_offset, \
161 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
162 | (((charset)->code_space[4] \
163 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
164 << 8) \
165 | (((charset)->code_space[8] \
166 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
167 << 16) \
168 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
169 << 24))))
170
171 /* Structure to hold mapping tables for a charset. Used by temacs
172 invoked for dumping. */
173
174 static struct
175 {
176 /* The current charset for which the following tables are setup. */
177 struct charset *current;
178
179 /* 1 iff the following table is used for encoder. */
180 short for_encoder;
181
182 /* When the following table is used for encoding, mininum and
183 maxinum character of the current charset. */
184 int min_char, max_char;
185
186 /* A Unicode character correspoinding to the code indice 0 (i.e. the
187 minimum code-point) of the current charset, or -1 if the code
188 indice 0 is not a Unicode character. This is checked when
189 table.encoder[CHAR] is zero. */
190 int zero_index_char;
191
192 union {
193 /* Table mapping code-indices (not code-points) of the current
194 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
195 doesn't belong to the current charset. */
196 int decoder[0x10000];
197 /* Table mapping Unicode characters to code-indices of the current
198 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
199 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
200 (0x20000..0x2FFFF). Note that there is no charset map that
201 uses both SMP and SIP. */
202 unsigned short encoder[0x20000];
203 } table;
204 } *temp_charset_work;
205
206 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
207 do { \
208 if ((CODE) == 0) \
209 temp_charset_work->zero_index_char = (C); \
210 else if ((C) < 0x20000) \
211 temp_charset_work->table.encoder[(C)] = (CODE); \
212 else \
213 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
214 } while (0)
215
216 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
217 ((C) == temp_charset_work->zero_index_char ? 0 \
218 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
219 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
220 : temp_charset_work->table.encoder[(C) - 0x10000] \
221 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
222
223 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
224 (temp_charset_work->table.decoder[(CODE)] = (C))
225
226 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
227 (temp_charset_work->table.decoder[(CODE)])
228 \f
229
230 /* Set to 1 to warn that a charset map is loaded and thus a buffer
231 text and a string data may be relocated. */
232 int charset_map_loaded;
233
234 struct charset_map_entries
235 {
236 struct {
237 unsigned from, to;
238 int c;
239 } entry[0x10000];
240 struct charset_map_entries *next;
241 };
242
243 /* Load the mapping information of CHARSET from ENTRIES for
244 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
245 encoding (CONTROL_FLAG == 2).
246
247 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
248 and CHARSET->fast_map.
249
250 If CONTROL_FLAG is 1, setup the following tables according to
251 CHARSET->method and inhibit_load_charset_map.
252
253 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
254 ----------------------+--------------------+---------------------------
255 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
256 ----------------------+--------------------+---------------------------
257 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
258
259 If CONTROL_FLAG is 2, setup the following tables.
260
261 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
262 ----------------------+--------------------+---------------------------
263 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
264 ----------------------+--------------------+--------------------------
265 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
266 */
267
268 static void
269 load_charset_map (charset, entries, n_entries, control_flag)
270 struct charset *charset;
271 struct charset_map_entries *entries;
272 int n_entries;
273 int control_flag;
274 {
275 Lisp_Object vec, table;
276 unsigned max_code = CHARSET_MAX_CODE (charset);
277 int ascii_compatible_p = charset->ascii_compatible_p;
278 int min_char, max_char, nonascii_min_char;
279 int i;
280 unsigned char *fast_map = charset->fast_map;
281
282 if (n_entries <= 0)
283 return;
284
285 if (control_flag)
286 {
287 if (! inhibit_load_charset_map)
288 {
289 if (control_flag == 1)
290 {
291 if (charset->method == CHARSET_METHOD_MAP)
292 {
293 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
294
295 vec = CHARSET_DECODER (charset)
296 = Fmake_vector (make_number (n), make_number (-1));
297 }
298 else
299 {
300 char_table_set_range (Vchar_unify_table,
301 charset->min_char, charset->max_char,
302 Qnil);
303 }
304 }
305 else
306 {
307 table = Fmake_char_table (Qnil, Qnil);
308 if (charset->method == CHARSET_METHOD_MAP)
309 CHARSET_ENCODER (charset) = table;
310 else
311 CHARSET_DEUNIFIER (charset) = table;
312 }
313 }
314 else
315 {
316 if (! temp_charset_work)
317 temp_charset_work = malloc (sizeof (*temp_charset_work));
318 if (control_flag == 1)
319 {
320 memset (temp_charset_work->table.decoder, -1,
321 sizeof (int) * 0x10000);
322 }
323 else
324 {
325 memset (temp_charset_work->table.encoder, 0,
326 sizeof (unsigned short) * 0x20000);
327 temp_charset_work->zero_index_char = -1;
328 }
329 temp_charset_work->current = charset;
330 temp_charset_work->for_encoder = (control_flag == 2);
331 control_flag += 2;
332 }
333 charset_map_loaded = 1;
334 }
335
336 min_char = max_char = entries->entry[0].c;
337 nonascii_min_char = MAX_CHAR;
338 for (i = 0; i < n_entries; i++)
339 {
340 unsigned from, to;
341 int from_index, to_index;
342 int from_c, to_c;
343 int idx = i % 0x10000;
344
345 if (i > 0 && idx == 0)
346 entries = entries->next;
347 from = entries->entry[idx].from;
348 to = entries->entry[idx].to;
349 from_c = entries->entry[idx].c;
350 from_index = CODE_POINT_TO_INDEX (charset, from);
351 if (from == to)
352 {
353 to_index = from_index;
354 to_c = from_c;
355 }
356 else
357 {
358 to_index = CODE_POINT_TO_INDEX (charset, to);
359 to_c = from_c + (to_index - from_index);
360 }
361 if (from_index < 0 || to_index < 0)
362 continue;
363
364 if (to_c > max_char)
365 max_char = to_c;
366 else if (from_c < min_char)
367 min_char = from_c;
368
369 if (control_flag == 1)
370 {
371 if (charset->method == CHARSET_METHOD_MAP)
372 for (; from_index <= to_index; from_index++, from_c++)
373 ASET (vec, from_index, make_number (from_c));
374 else
375 for (; from_index <= to_index; from_index++, from_c++)
376 CHAR_TABLE_SET (Vchar_unify_table,
377 CHARSET_CODE_OFFSET (charset) + from_index,
378 make_number (from_c));
379 }
380 else if (control_flag == 2)
381 {
382 if (charset->method == CHARSET_METHOD_MAP
383 && CHARSET_COMPACT_CODES_P (charset))
384 for (; from_index <= to_index; from_index++, from_c++)
385 {
386 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
387
388 if (NILP (CHAR_TABLE_REF (table, from_c)))
389 CHAR_TABLE_SET (table, from_c, make_number (code));
390 }
391 else
392 for (; from_index <= to_index; from_index++, from_c++)
393 {
394 if (NILP (CHAR_TABLE_REF (table, from_c)))
395 CHAR_TABLE_SET (table, from_c, make_number (from_index));
396 }
397 }
398 else if (control_flag == 3)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
401 else if (control_flag == 4)
402 for (; from_index <= to_index; from_index++, from_c++)
403 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
404 else /* control_flag == 0 */
405 {
406 if (ascii_compatible_p)
407 {
408 if (! ASCII_BYTE_P (from_c))
409 {
410 if (from_c < nonascii_min_char)
411 nonascii_min_char = from_c;
412 }
413 else if (! ASCII_BYTE_P (to_c))
414 {
415 nonascii_min_char = 0x80;
416 }
417 }
418
419 for (; from_c <= to_c; from_c++)
420 CHARSET_FAST_MAP_SET (from_c, fast_map);
421 }
422 }
423
424 if (control_flag == 0)
425 {
426 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
427 ? nonascii_min_char : min_char);
428 CHARSET_MAX_CHAR (charset) = max_char;
429 }
430 else if (control_flag == 4)
431 {
432 temp_charset_work->min_char = min_char;
433 temp_charset_work->max_char = max_char;
434 }
435 }
436
437
438 /* Read a hexadecimal number (preceded by "0x") from the file FP while
439 paying attention to comment charcter '#'. */
440
441 static INLINE unsigned
442 read_hex (fp, eof)
443 FILE *fp;
444 int *eof;
445 {
446 int c;
447 unsigned n;
448
449 while ((c = getc (fp)) != EOF)
450 {
451 if (c == '#')
452 {
453 while ((c = getc (fp)) != EOF && c != '\n');
454 }
455 else if (c == '0')
456 {
457 if ((c = getc (fp)) == EOF || c == 'x')
458 break;
459 }
460 }
461 if (c == EOF)
462 {
463 *eof = 1;
464 return 0;
465 }
466 *eof = 0;
467 n = 0;
468 if (c == 'x')
469 while ((c = getc (fp)) != EOF && isxdigit (c))
470 n = ((n << 4)
471 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
472 else
473 while ((c = getc (fp)) != EOF && isdigit (c))
474 n = (n * 10) + c - '0';
475 if (c != EOF)
476 ungetc (c, fp);
477 return n;
478 }
479
480 extern Lisp_Object Qfile_name_handler_alist;
481
482 /* Return a mapping vector for CHARSET loaded from MAPFILE.
483 Each line of MAPFILE has this form
484 0xAAAA 0xCCCC
485 where 0xAAAA is a code-point and 0xCCCC is the corresponding
486 character code, or this form
487 0xAAAA-0xBBBB 0xCCCC
488 where 0xAAAA and 0xBBBB are code-points specifying a range, and
489 0xCCCC is the first character code of the range.
490
491 The returned vector has this form:
492 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
493 where CODE1 is a code-point or a cons of code-points specifying a
494 range.
495
496 Note that this function uses `openp' to open MAPFILE but ignores
497 `file-name-handler-alist' to avoid running any Lisp code. */
498
499 extern void add_to_log P_ ((char *, Lisp_Object, Lisp_Object));
500
501 static void
502 load_charset_map_from_file (charset, mapfile, control_flag)
503 struct charset *charset;
504 Lisp_Object mapfile;
505 int control_flag;
506 {
507 unsigned min_code = CHARSET_MIN_CODE (charset);
508 unsigned max_code = CHARSET_MAX_CODE (charset);
509 int fd;
510 FILE *fp;
511 int eof;
512 Lisp_Object suffixes;
513 struct charset_map_entries *head, *entries;
514 int n_entries;
515 int count = SPECPDL_INDEX ();
516
517 suffixes = Fcons (build_string (".map"),
518 Fcons (build_string (".TXT"), Qnil));
519
520 specbind (Qfile_name_handler_alist, Qnil);
521 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
522 unbind_to (count, Qnil);
523 if (fd < 0
524 || ! (fp = fdopen (fd, "r")))
525 error ("Failure in loading charset map: %S", SDATA (mapfile));
526
527 head = entries = ((struct charset_map_entries *)
528 alloca (sizeof (struct charset_map_entries)));
529 n_entries = 0;
530 eof = 0;
531 while (1)
532 {
533 unsigned from, to;
534 int c;
535 int idx;
536
537 from = read_hex (fp, &eof);
538 if (eof)
539 break;
540 if (getc (fp) == '-')
541 to = read_hex (fp, &eof);
542 else
543 to = from;
544 c = (int) read_hex (fp, &eof);
545
546 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
547 continue;
548
549 if (n_entries > 0 && (n_entries % 0x10000) == 0)
550 {
551 entries->next = ((struct charset_map_entries *)
552 alloca (sizeof (struct charset_map_entries)));
553 entries = entries->next;
554 }
555 idx = n_entries % 0x10000;
556 entries->entry[idx].from = from;
557 entries->entry[idx].to = to;
558 entries->entry[idx].c = c;
559 n_entries++;
560 }
561 fclose (fp);
562 close (fd);
563
564 load_charset_map (charset, head, n_entries, control_flag);
565 }
566
567 static void
568 load_charset_map_from_vector (charset, vec, control_flag)
569 struct charset *charset;
570 Lisp_Object vec;
571 int control_flag;
572 {
573 unsigned min_code = CHARSET_MIN_CODE (charset);
574 unsigned max_code = CHARSET_MAX_CODE (charset);
575 struct charset_map_entries *head, *entries;
576 int n_entries;
577 int len = ASIZE (vec);
578 int i;
579
580 if (len % 2 == 1)
581 {
582 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
583 return;
584 }
585
586 head = entries = ((struct charset_map_entries *)
587 alloca (sizeof (struct charset_map_entries)));
588 n_entries = 0;
589 for (i = 0; i < len; i += 2)
590 {
591 Lisp_Object val, val2;
592 unsigned from, to;
593 int c;
594 int idx;
595
596 val = AREF (vec, i);
597 if (CONSP (val))
598 {
599 val2 = XCDR (val);
600 val = XCAR (val);
601 CHECK_NATNUM (val);
602 CHECK_NATNUM (val2);
603 from = XFASTINT (val);
604 to = XFASTINT (val2);
605 }
606 else
607 {
608 CHECK_NATNUM (val);
609 from = to = XFASTINT (val);
610 }
611 val = AREF (vec, i + 1);
612 CHECK_NATNUM (val);
613 c = XFASTINT (val);
614
615 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
616 continue;
617
618 if (n_entries > 0 && (n_entries % 0x10000) == 0)
619 {
620 entries->next = ((struct charset_map_entries *)
621 alloca (sizeof (struct charset_map_entries)));
622 entries = entries->next;
623 }
624 idx = n_entries % 0x10000;
625 entries->entry[idx].from = from;
626 entries->entry[idx].to = to;
627 entries->entry[idx].c = c;
628 n_entries++;
629 }
630
631 load_charset_map (charset, head, n_entries, control_flag);
632 }
633
634
635 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
636 map it is (see the comment of load_charset_map for the detail). */
637
638 static void
639 load_charset (charset, control_flag)
640 struct charset *charset;
641 int control_flag;
642 {
643 Lisp_Object map;
644
645 if (inhibit_load_charset_map
646 && temp_charset_work
647 && charset == temp_charset_work->current
648 && ((control_flag == 2) == temp_charset_work->for_encoder))
649 return;
650
651 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
652 map = CHARSET_MAP (charset);
653 else if (CHARSET_UNIFIED_P (charset))
654 map = CHARSET_UNIFY_MAP (charset);
655 if (STRINGP (map))
656 load_charset_map_from_file (charset, map, control_flag);
657 else
658 load_charset_map_from_vector (charset, map, control_flag);
659 }
660
661
662 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
663 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
664 (object)
665 Lisp_Object object;
666 {
667 return (CHARSETP (object) ? Qt : Qnil);
668 }
669
670
671 void map_charset_for_dump P_ ((void (*c_function) (Lisp_Object, Lisp_Object),
672 Lisp_Object function, Lisp_Object arg,
673 unsigned from, unsigned to));
674
675 void
676 map_charset_for_dump (c_function, function, arg, from, to)
677 void (*c_function) (Lisp_Object, Lisp_Object);
678 Lisp_Object function, arg;
679 unsigned from, to;
680 {
681 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
682 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
683 Lisp_Object range;
684 int c, stop;
685 struct gcpro gcpro1;
686
687 range = Fcons (Qnil, Qnil);
688 GCPRO1 (range);
689
690 c = temp_charset_work->min_char;
691 stop = (temp_charset_work->max_char < 0x20000
692 ? temp_charset_work->max_char : 0xFFFF);
693
694 while (1)
695 {
696 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
697
698 if (index >= from_idx && index <= to_idx)
699 {
700 if (NILP (XCAR (range)))
701 XSETCAR (range, make_number (c));
702 }
703 else if (! NILP (XCAR (range)))
704 {
705 XSETCDR (range, make_number (c - 1));
706 if (c_function)
707 (*c_function) (arg, range);
708 else
709 call2 (function, range, arg);
710 XSETCAR (range, Qnil);
711 }
712 if (c == stop)
713 {
714 if (c == temp_charset_work->max_char)
715 {
716 if (! NILP (XCAR (range)))
717 {
718 XSETCDR (range, make_number (c));
719 if (c_function)
720 (*c_function) (arg, range);
721 else
722 call2 (function, range, arg);
723 }
724 break;
725 }
726 c = 0x1FFFF;
727 stop = temp_charset_work->max_char;
728 }
729 c++;
730 }
731 UNGCPRO;
732 }
733
734 void
735 map_charset_chars (c_function, function, arg,
736 charset, from, to)
737 void (*c_function) P_ ((Lisp_Object, Lisp_Object));
738 Lisp_Object function, arg;
739 struct charset *charset;
740 unsigned from, to;
741 {
742 Lisp_Object range;
743 int partial;
744
745 partial = (from > CHARSET_MIN_CODE (charset)
746 || to < CHARSET_MAX_CODE (charset));
747
748 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
749 {
750 int from_idx = CODE_POINT_TO_INDEX (charset, from);
751 int to_idx = CODE_POINT_TO_INDEX (charset, to);
752 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
753 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
754
755 if (CHARSET_UNIFIED_P (charset))
756 {
757 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
758 load_charset (charset, 2);
759 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
760 map_char_table_for_charset (c_function, function,
761 CHARSET_DEUNIFIER (charset), arg,
762 partial ? charset : NULL, from, to);
763 else
764 map_charset_for_dump (c_function, function, arg, from, to);
765 }
766
767 range = Fcons (make_number (from_c), make_number (to_c));
768 if (NILP (function))
769 (*c_function) (arg, range);
770 else
771 call2 (function, range, arg);
772 }
773 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
774 {
775 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
776 load_charset (charset, 2);
777 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
778 map_char_table_for_charset (c_function, function,
779 CHARSET_ENCODER (charset), arg,
780 partial ? charset : NULL, from, to);
781 else
782 map_charset_for_dump (c_function, function, arg, from, to);
783 }
784 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
785 {
786 Lisp_Object subset_info;
787 int offset;
788
789 subset_info = CHARSET_SUBSET (charset);
790 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
791 offset = XINT (AREF (subset_info, 3));
792 from -= offset;
793 if (from < XFASTINT (AREF (subset_info, 1)))
794 from = XFASTINT (AREF (subset_info, 1));
795 to -= offset;
796 if (to > XFASTINT (AREF (subset_info, 2)))
797 to = XFASTINT (AREF (subset_info, 2));
798 map_charset_chars (c_function, function, arg, charset, from, to);
799 }
800 else /* i.e. CHARSET_METHOD_SUPERSET */
801 {
802 Lisp_Object parents;
803
804 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
805 parents = XCDR (parents))
806 {
807 int offset;
808 unsigned this_from, this_to;
809
810 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
811 offset = XINT (XCDR (XCAR (parents)));
812 this_from = from > offset ? from - offset : 0;
813 this_to = to > offset ? to - offset : 0;
814 if (this_from < CHARSET_MIN_CODE (charset))
815 this_from = CHARSET_MIN_CODE (charset);
816 if (this_to > CHARSET_MAX_CODE (charset))
817 this_to = CHARSET_MAX_CODE (charset);
818 map_charset_chars (c_function, function, arg, charset,
819 this_from, this_to);
820 }
821 }
822 }
823
824 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
825 doc: /* Call FUNCTION for all characters in CHARSET.
826 FUNCTION is called with an argument RANGE and the optional 3rd
827 argument ARG.
828
829 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
830 characters contained in CHARSET.
831
832 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
833 range of code points (in CHARSET) of target characters. */)
834 (function, charset, arg, from_code, to_code)
835 Lisp_Object function, charset, arg, from_code, to_code;
836 {
837 struct charset *cs;
838 unsigned from, to;
839
840 CHECK_CHARSET_GET_CHARSET (charset, cs);
841 if (NILP (from_code))
842 from = CHARSET_MIN_CODE (cs);
843 else
844 {
845 CHECK_NATNUM (from_code);
846 from = XINT (from_code);
847 if (from < CHARSET_MIN_CODE (cs))
848 from = CHARSET_MIN_CODE (cs);
849 }
850 if (NILP (to_code))
851 to = CHARSET_MAX_CODE (cs);
852 else
853 {
854 CHECK_NATNUM (to_code);
855 to = XINT (to_code);
856 if (to > CHARSET_MAX_CODE (cs))
857 to = CHARSET_MAX_CODE (cs);
858 }
859 map_charset_chars (NULL, function, arg, cs, from, to);
860 return Qnil;
861 }
862
863
864 /* Define a charset according to the arguments. The Nth argument is
865 the Nth attribute of the charset (the last attribute `charset-id'
866 is not included). See the docstring of `define-charset' for the
867 detail. */
868
869 DEFUN ("define-charset-internal", Fdefine_charset_internal,
870 Sdefine_charset_internal, charset_arg_max, MANY, 0,
871 doc: /* For internal use only.
872 usage: (define-charset-internal ...) */)
873 (nargs, args)
874 int nargs;
875 Lisp_Object *args;
876 {
877 /* Charset attr vector. */
878 Lisp_Object attrs;
879 Lisp_Object val;
880 unsigned hash_code;
881 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
882 int i, j;
883 struct charset charset;
884 int id;
885 int dimension;
886 int new_definition_p;
887 int nchars;
888
889 if (nargs != charset_arg_max)
890 return Fsignal (Qwrong_number_of_arguments,
891 Fcons (intern ("define-charset-internal"),
892 make_number (nargs)));
893
894 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
895
896 CHECK_SYMBOL (args[charset_arg_name]);
897 ASET (attrs, charset_name, args[charset_arg_name]);
898
899 val = args[charset_arg_code_space];
900 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
901 {
902 int min_byte, max_byte;
903
904 min_byte = XINT (Faref (val, make_number (i * 2)));
905 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
906 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
907 error ("Invalid :code-space value");
908 charset.code_space[i * 4] = min_byte;
909 charset.code_space[i * 4 + 1] = max_byte;
910 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
911 nchars *= charset.code_space[i * 4 + 2];
912 charset.code_space[i * 4 + 3] = nchars;
913 if (max_byte > 0)
914 dimension = i + 1;
915 }
916
917 val = args[charset_arg_dimension];
918 if (NILP (val))
919 charset.dimension = dimension;
920 else
921 {
922 CHECK_NATNUM (val);
923 charset.dimension = XINT (val);
924 if (charset.dimension < 1 || charset.dimension > 4)
925 args_out_of_range_3 (val, make_number (1), make_number (4));
926 }
927
928 charset.code_linear_p
929 = (charset.dimension == 1
930 || (charset.code_space[2] == 256
931 && (charset.dimension == 2
932 || (charset.code_space[6] == 256
933 && (charset.dimension == 3
934 || charset.code_space[10] == 256)))));
935
936 if (! charset.code_linear_p)
937 {
938 charset.code_space_mask = (unsigned char *) xmalloc (256);
939 bzero (charset.code_space_mask, 256);
940 for (i = 0; i < 4; i++)
941 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
942 j++)
943 charset.code_space_mask[j] |= (1 << i);
944 }
945
946 charset.iso_chars_96 = charset.code_space[2] == 96;
947
948 charset.min_code = (charset.code_space[0]
949 | (charset.code_space[4] << 8)
950 | (charset.code_space[8] << 16)
951 | (charset.code_space[12] << 24));
952 charset.max_code = (charset.code_space[1]
953 | (charset.code_space[5] << 8)
954 | (charset.code_space[9] << 16)
955 | (charset.code_space[13] << 24));
956 charset.char_index_offset = 0;
957
958 val = args[charset_arg_min_code];
959 if (! NILP (val))
960 {
961 unsigned code;
962
963 if (INTEGERP (val))
964 code = XINT (val);
965 else
966 {
967 CHECK_CONS (val);
968 CHECK_NUMBER_CAR (val);
969 CHECK_NUMBER_CDR (val);
970 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
971 }
972 if (code < charset.min_code
973 || code > charset.max_code)
974 args_out_of_range_3 (make_number (charset.min_code),
975 make_number (charset.max_code), val);
976 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
977 charset.min_code = code;
978 }
979
980 val = args[charset_arg_max_code];
981 if (! NILP (val))
982 {
983 unsigned code;
984
985 if (INTEGERP (val))
986 code = XINT (val);
987 else
988 {
989 CHECK_CONS (val);
990 CHECK_NUMBER_CAR (val);
991 CHECK_NUMBER_CDR (val);
992 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
993 }
994 if (code < charset.min_code
995 || code > charset.max_code)
996 args_out_of_range_3 (make_number (charset.min_code),
997 make_number (charset.max_code), val);
998 charset.max_code = code;
999 }
1000
1001 charset.compact_codes_p = charset.max_code < 0x10000;
1002
1003 val = args[charset_arg_invalid_code];
1004 if (NILP (val))
1005 {
1006 if (charset.min_code > 0)
1007 charset.invalid_code = 0;
1008 else
1009 {
1010 XSETINT (val, charset.max_code + 1);
1011 if (XINT (val) == charset.max_code + 1)
1012 charset.invalid_code = charset.max_code + 1;
1013 else
1014 error ("Attribute :invalid-code must be specified");
1015 }
1016 }
1017 else
1018 {
1019 CHECK_NATNUM (val);
1020 charset.invalid_code = XFASTINT (val);
1021 }
1022
1023 val = args[charset_arg_iso_final];
1024 if (NILP (val))
1025 charset.iso_final = -1;
1026 else
1027 {
1028 CHECK_NUMBER (val);
1029 if (XINT (val) < '0' || XINT (val) > 127)
1030 error ("Invalid iso-final-char: %d", XINT (val));
1031 charset.iso_final = XINT (val);
1032 }
1033
1034 val = args[charset_arg_iso_revision];
1035 if (NILP (val))
1036 charset.iso_revision = -1;
1037 else
1038 {
1039 CHECK_NUMBER (val);
1040 if (XINT (val) > 63)
1041 args_out_of_range (make_number (63), val);
1042 charset.iso_revision = XINT (val);
1043 }
1044
1045 val = args[charset_arg_emacs_mule_id];
1046 if (NILP (val))
1047 charset.emacs_mule_id = -1;
1048 else
1049 {
1050 CHECK_NATNUM (val);
1051 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1052 error ("Invalid emacs-mule-id: %d", XINT (val));
1053 charset.emacs_mule_id = XINT (val);
1054 }
1055
1056 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1057
1058 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1059
1060 charset.unified_p = 0;
1061
1062 bzero (charset.fast_map, sizeof (charset.fast_map));
1063
1064 if (! NILP (args[charset_arg_code_offset]))
1065 {
1066 val = args[charset_arg_code_offset];
1067 CHECK_NUMBER (val);
1068
1069 charset.method = CHARSET_METHOD_OFFSET;
1070 charset.code_offset = XINT (val);
1071
1072 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1073 charset.min_char = i + charset.code_offset;
1074 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1075 charset.max_char = i + charset.code_offset;
1076 if (charset.max_char > MAX_CHAR)
1077 error ("Unsupported max char: %d", charset.max_char);
1078
1079 i = (charset.min_char >> 7) << 7;
1080 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1081 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1082 i = (i >> 12) << 12;
1083 for (; i <= charset.max_char; i += 0x1000)
1084 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1085 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1086 charset.ascii_compatible_p = 1;
1087 }
1088 else if (! NILP (args[charset_arg_map]))
1089 {
1090 val = args[charset_arg_map];
1091 ASET (attrs, charset_map, val);
1092 charset.method = CHARSET_METHOD_MAP;
1093 }
1094 else if (! NILP (args[charset_arg_subset]))
1095 {
1096 Lisp_Object parent;
1097 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1098 struct charset *parent_charset;
1099
1100 val = args[charset_arg_subset];
1101 parent = Fcar (val);
1102 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1103 parent_min_code = Fnth (make_number (1), val);
1104 CHECK_NATNUM (parent_min_code);
1105 parent_max_code = Fnth (make_number (2), val);
1106 CHECK_NATNUM (parent_max_code);
1107 parent_code_offset = Fnth (make_number (3), val);
1108 CHECK_NUMBER (parent_code_offset);
1109 val = Fmake_vector (make_number (4), Qnil);
1110 ASET (val, 0, make_number (parent_charset->id));
1111 ASET (val, 1, parent_min_code);
1112 ASET (val, 2, parent_max_code);
1113 ASET (val, 3, parent_code_offset);
1114 ASET (attrs, charset_subset, val);
1115
1116 charset.method = CHARSET_METHOD_SUBSET;
1117 /* Here, we just copy the parent's fast_map. It's not accurate,
1118 but at least it works for quickly detecting which character
1119 DOESN'T belong to this charset. */
1120 for (i = 0; i < 190; i++)
1121 charset.fast_map[i] = parent_charset->fast_map[i];
1122
1123 /* We also copy these for parents. */
1124 charset.min_char = parent_charset->min_char;
1125 charset.max_char = parent_charset->max_char;
1126 }
1127 else if (! NILP (args[charset_arg_superset]))
1128 {
1129 val = args[charset_arg_superset];
1130 charset.method = CHARSET_METHOD_SUPERSET;
1131 val = Fcopy_sequence (val);
1132 ASET (attrs, charset_superset, val);
1133
1134 charset.min_char = MAX_CHAR;
1135 charset.max_char = 0;
1136 for (; ! NILP (val); val = Fcdr (val))
1137 {
1138 Lisp_Object elt, car_part, cdr_part;
1139 int this_id, offset;
1140 struct charset *this_charset;
1141
1142 elt = Fcar (val);
1143 if (CONSP (elt))
1144 {
1145 car_part = XCAR (elt);
1146 cdr_part = XCDR (elt);
1147 CHECK_CHARSET_GET_ID (car_part, this_id);
1148 CHECK_NUMBER (cdr_part);
1149 offset = XINT (cdr_part);
1150 }
1151 else
1152 {
1153 CHECK_CHARSET_GET_ID (elt, this_id);
1154 offset = 0;
1155 }
1156 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1157
1158 this_charset = CHARSET_FROM_ID (this_id);
1159 if (charset.min_char > this_charset->min_char)
1160 charset.min_char = this_charset->min_char;
1161 if (charset.max_char < this_charset->max_char)
1162 charset.max_char = this_charset->max_char;
1163 for (i = 0; i < 190; i++)
1164 charset.fast_map[i] |= this_charset->fast_map[i];
1165 }
1166 }
1167 else
1168 error ("None of :code-offset, :map, :parents are specified");
1169
1170 val = args[charset_arg_unify_map];
1171 if (! NILP (val) && !STRINGP (val))
1172 CHECK_VECTOR (val);
1173 ASET (attrs, charset_unify_map, val);
1174
1175 CHECK_LIST (args[charset_arg_plist]);
1176 ASET (attrs, charset_plist, args[charset_arg_plist]);
1177
1178 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1179 &hash_code);
1180 if (charset.hash_index >= 0)
1181 {
1182 new_definition_p = 0;
1183 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1184 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1185 }
1186 else
1187 {
1188 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1189 hash_code);
1190 if (charset_table_used == charset_table_size)
1191 {
1192 struct charset *new_table
1193 = (struct charset *) xmalloc (sizeof (struct charset)
1194 * (charset_table_size + 16));
1195 bcopy (charset_table, new_table,
1196 sizeof (struct charset) * charset_table_size);
1197 charset_table_size += 16;
1198 charset_table = new_table;
1199 }
1200 id = charset_table_used++;
1201 new_definition_p = 1;
1202 }
1203
1204 ASET (attrs, charset_id, make_number (id));
1205 charset.id = id;
1206 charset_table[id] = charset;
1207
1208 if (charset.method == CHARSET_METHOD_MAP)
1209 {
1210 load_charset (&charset, 0);
1211 charset_table[id] = charset;
1212 }
1213
1214 if (charset.iso_final >= 0)
1215 {
1216 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1217 charset.iso_final) = id;
1218 if (new_definition_p)
1219 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1220 Fcons (make_number (id), Qnil));
1221 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1222 charset_jisx0201_roman = id;
1223 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1224 charset_jisx0208_1978 = id;
1225 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1226 charset_jisx0208 = id;
1227 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1228 charset_ksc5601 = id;
1229 }
1230
1231 if (charset.emacs_mule_id >= 0)
1232 {
1233 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1234 if (charset.emacs_mule_id < 0xA0)
1235 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1236 else
1237 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1238 if (new_definition_p)
1239 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1240 Fcons (make_number (id), Qnil));
1241 }
1242
1243 if (new_definition_p)
1244 {
1245 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1246 if (charset.supplementary_p)
1247 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1248 Fcons (make_number (id), Qnil));
1249 else
1250 {
1251 Lisp_Object tail;
1252
1253 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1254 {
1255 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1256
1257 if (cs->supplementary_p)
1258 break;
1259 }
1260 if (EQ (tail, Vcharset_ordered_list))
1261 Vcharset_ordered_list = Fcons (make_number (id),
1262 Vcharset_ordered_list);
1263 else if (NILP (tail))
1264 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1265 Fcons (make_number (id), Qnil));
1266 else
1267 {
1268 val = Fcons (XCAR (tail), XCDR (tail));
1269 XSETCDR (tail, val);
1270 XSETCAR (tail, make_number (id));
1271 }
1272 }
1273 charset_ordered_list_tick++;
1274 }
1275
1276 return Qnil;
1277 }
1278
1279
1280 /* Same as Fdefine_charset_internal but arguments are more convenient
1281 to call from C (typically in syms_of_charset). This can define a
1282 charset of `offset' method only. Return the ID of the new
1283 charset. */
1284
1285 static int
1286 define_charset_internal (name, dimension, code_space, min_code, max_code,
1287 iso_final, iso_revision, emacs_mule_id,
1288 ascii_compatible, supplementary,
1289 code_offset)
1290 Lisp_Object name;
1291 int dimension;
1292 unsigned char *code_space;
1293 unsigned min_code, max_code;
1294 int iso_final, iso_revision, emacs_mule_id;
1295 int ascii_compatible, supplementary;
1296 int code_offset;
1297 {
1298 Lisp_Object args[charset_arg_max];
1299 Lisp_Object plist[14];
1300 Lisp_Object val;
1301 int i;
1302
1303 args[charset_arg_name] = name;
1304 args[charset_arg_dimension] = make_number (dimension);
1305 val = Fmake_vector (make_number (8), make_number (0));
1306 for (i = 0; i < 8; i++)
1307 ASET (val, i, make_number (code_space[i]));
1308 args[charset_arg_code_space] = val;
1309 args[charset_arg_min_code] = make_number (min_code);
1310 args[charset_arg_max_code] = make_number (max_code);
1311 args[charset_arg_iso_final]
1312 = (iso_final < 0 ? Qnil : make_number (iso_final));
1313 args[charset_arg_iso_revision] = make_number (iso_revision);
1314 args[charset_arg_emacs_mule_id]
1315 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1316 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1317 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1318 args[charset_arg_invalid_code] = Qnil;
1319 args[charset_arg_code_offset] = make_number (code_offset);
1320 args[charset_arg_map] = Qnil;
1321 args[charset_arg_subset] = Qnil;
1322 args[charset_arg_superset] = Qnil;
1323 args[charset_arg_unify_map] = Qnil;
1324
1325 plist[0] = intern (":name");
1326 plist[1] = args[charset_arg_name];
1327 plist[2] = intern (":dimension");
1328 plist[3] = args[charset_arg_dimension];
1329 plist[4] = intern (":code-space");
1330 plist[5] = args[charset_arg_code_space];
1331 plist[6] = intern (":iso-final-char");
1332 plist[7] = args[charset_arg_iso_final];
1333 plist[8] = intern (":emacs-mule-id");
1334 plist[9] = args[charset_arg_emacs_mule_id];
1335 plist[10] = intern (":ascii-compatible-p");
1336 plist[11] = args[charset_arg_ascii_compatible_p];
1337 plist[12] = intern (":code-offset");
1338 plist[13] = args[charset_arg_code_offset];
1339
1340 args[charset_arg_plist] = Flist (14, plist);
1341 Fdefine_charset_internal (charset_arg_max, args);
1342
1343 return XINT (CHARSET_SYMBOL_ID (name));
1344 }
1345
1346
1347 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1348 Sdefine_charset_alias, 2, 2, 0,
1349 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1350 (alias, charset)
1351 Lisp_Object alias, charset;
1352 {
1353 Lisp_Object attr;
1354
1355 CHECK_CHARSET_GET_ATTR (charset, attr);
1356 Fputhash (alias, attr, Vcharset_hash_table);
1357 Vcharset_list = Fcons (alias, Vcharset_list);
1358 return Qnil;
1359 }
1360
1361
1362 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1363 doc: /* Return the property list of CHARSET. */)
1364 (charset)
1365 Lisp_Object charset;
1366 {
1367 Lisp_Object attrs;
1368
1369 CHECK_CHARSET_GET_ATTR (charset, attrs);
1370 return CHARSET_ATTR_PLIST (attrs);
1371 }
1372
1373
1374 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1375 doc: /* Set CHARSET's property list to PLIST. */)
1376 (charset, plist)
1377 Lisp_Object charset, plist;
1378 {
1379 Lisp_Object attrs;
1380
1381 CHECK_CHARSET_GET_ATTR (charset, attrs);
1382 CHARSET_ATTR_PLIST (attrs) = plist;
1383 return plist;
1384 }
1385
1386
1387 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1388 doc: /* Unify characters of CHARSET with Unicode.
1389 This means reading the relevant file and installing the table defined
1390 by CHARSET's `:unify-map' property.
1391
1392 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1393 the same meaning as the `:unify-map' attribute in the function
1394 `define-charset' (which see).
1395
1396 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1397 (charset, unify_map, deunify)
1398 Lisp_Object charset, unify_map, deunify;
1399 {
1400 int id;
1401 struct charset *cs;
1402
1403 CHECK_CHARSET_GET_ID (charset, id);
1404 cs = CHARSET_FROM_ID (id);
1405 if (NILP (deunify)
1406 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1407 : ! CHARSET_UNIFIED_P (cs))
1408 return Qnil;
1409
1410 CHARSET_UNIFIED_P (cs) = 0;
1411 if (NILP (deunify))
1412 {
1413 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1414 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1415 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1416 if (NILP (unify_map))
1417 unify_map = CHARSET_UNIFY_MAP (cs);
1418 else
1419 {
1420 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1421 signal_error ("Bad unify-map", unify_map);
1422 CHARSET_UNIFY_MAP (cs) = unify_map;
1423 }
1424 if (NILP (Vchar_unify_table))
1425 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1426 char_table_set_range (Vchar_unify_table,
1427 cs->min_char, cs->max_char, charset);
1428 CHARSET_UNIFIED_P (cs) = 1;
1429 }
1430 else if (CHAR_TABLE_P (Vchar_unify_table))
1431 {
1432 int min_code = CHARSET_MIN_CODE (cs);
1433 int max_code = CHARSET_MAX_CODE (cs);
1434 int min_char = DECODE_CHAR (cs, min_code);
1435 int max_char = DECODE_CHAR (cs, max_code);
1436
1437 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1438 }
1439
1440 return Qnil;
1441 }
1442
1443 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1444 Sget_unused_iso_final_char, 2, 2, 0,
1445 doc: /*
1446 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1447 DIMENSION is the number of bytes to represent a character: 1 or 2.
1448 CHARS is the number of characters in a dimension: 94 or 96.
1449
1450 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1451 If there's no unused final char for the specified kind of charset,
1452 return nil. */)
1453 (dimension, chars)
1454 Lisp_Object dimension, chars;
1455 {
1456 int final_char;
1457
1458 CHECK_NUMBER (dimension);
1459 CHECK_NUMBER (chars);
1460 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1461 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1462 if (XINT (chars) != 94 && XINT (chars) != 96)
1463 args_out_of_range_3 (chars, make_number (94), make_number (96));
1464 for (final_char = '0'; final_char <= '?'; final_char++)
1465 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1466 break;
1467 return (final_char <= '?' ? make_number (final_char) : Qnil);
1468 }
1469
1470 static void
1471 check_iso_charset_parameter (dimension, chars, final_char)
1472 Lisp_Object dimension, chars, final_char;
1473 {
1474 CHECK_NATNUM (dimension);
1475 CHECK_NATNUM (chars);
1476 CHECK_NATNUM (final_char);
1477
1478 if (XINT (dimension) > 3)
1479 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1480 if (XINT (chars) != 94 && XINT (chars) != 96)
1481 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1482 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1483 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1484 }
1485
1486
1487 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1488 4, 4, 0,
1489 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1490
1491 On decoding by an ISO-2022 base coding system, when a charset
1492 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1493 if CHARSET is designated instead. */)
1494 (dimension, chars, final_char, charset)
1495 Lisp_Object dimension, chars, final_char, charset;
1496 {
1497 int id;
1498 int chars_flag;
1499
1500 CHECK_CHARSET_GET_ID (charset, id);
1501 check_iso_charset_parameter (dimension, chars, final_char);
1502 chars_flag = XINT (chars) == 96;
1503 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1504 return Qnil;
1505 }
1506
1507
1508 /* Return information about charsets in the text at PTR of NBYTES
1509 bytes, which are NCHARS characters. The value is:
1510
1511 0: Each character is represented by one byte. This is always
1512 true for a unibyte string. For a multibyte string, true if
1513 it contains only ASCII characters.
1514
1515 1: No charsets other than ascii, control-1, and latin-1 are
1516 found.
1517
1518 2: Otherwise.
1519 */
1520
1521 int
1522 string_xstring_p (string)
1523 Lisp_Object string;
1524 {
1525 const unsigned char *p = SDATA (string);
1526 const unsigned char *endp = p + SBYTES (string);
1527
1528 if (SCHARS (string) == SBYTES (string))
1529 return 0;
1530
1531 while (p < endp)
1532 {
1533 int c = STRING_CHAR_ADVANCE (p);
1534
1535 if (c >= 0x100)
1536 return 2;
1537 }
1538 return 1;
1539 }
1540
1541
1542 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1543
1544 CHARSETS is a vector. If Nth element is non-nil, it means the
1545 charset whose id is N is already found.
1546
1547 It may lookup a translation table TABLE if supplied. */
1548
1549 static void
1550 find_charsets_in_text (ptr, nchars, nbytes, charsets, table, multibyte)
1551 const unsigned char *ptr;
1552 EMACS_INT nchars, nbytes;
1553 Lisp_Object charsets, table;
1554 int multibyte;
1555 {
1556 const unsigned char *pend = ptr + nbytes;
1557
1558 if (nchars == nbytes)
1559 {
1560 if (multibyte)
1561 ASET (charsets, charset_ascii, Qt);
1562 else
1563 while (ptr < pend)
1564 {
1565 int c = *ptr++;
1566
1567 if (!NILP (table))
1568 c = translate_char (table, c);
1569 if (ASCII_BYTE_P (c))
1570 ASET (charsets, charset_ascii, Qt);
1571 else
1572 ASET (charsets, charset_eight_bit, Qt);
1573 }
1574 }
1575 else
1576 {
1577 while (ptr < pend)
1578 {
1579 int c = STRING_CHAR_ADVANCE (ptr);
1580 struct charset *charset;
1581
1582 if (!NILP (table))
1583 c = translate_char (table, c);
1584 charset = CHAR_CHARSET (c);
1585 ASET (charsets, CHARSET_ID (charset), Qt);
1586 }
1587 }
1588 }
1589
1590 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1591 2, 3, 0,
1592 doc: /* Return a list of charsets in the region between BEG and END.
1593 BEG and END are buffer positions.
1594 Optional arg TABLE if non-nil is a translation table to look up.
1595
1596 If the current buffer is unibyte, the returned list may contain
1597 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1598 (beg, end, table)
1599 Lisp_Object beg, end, table;
1600 {
1601 Lisp_Object charsets;
1602 EMACS_INT from, from_byte, to, stop, stop_byte;
1603 int i;
1604 Lisp_Object val;
1605 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1606
1607 validate_region (&beg, &end);
1608 from = XFASTINT (beg);
1609 stop = to = XFASTINT (end);
1610
1611 if (from < GPT && GPT < to)
1612 {
1613 stop = GPT;
1614 stop_byte = GPT_BYTE;
1615 }
1616 else
1617 stop_byte = CHAR_TO_BYTE (stop);
1618
1619 from_byte = CHAR_TO_BYTE (from);
1620
1621 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1622 while (1)
1623 {
1624 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1625 stop_byte - from_byte, charsets, table,
1626 multibyte);
1627 if (stop < to)
1628 {
1629 from = stop, from_byte = stop_byte;
1630 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1631 }
1632 else
1633 break;
1634 }
1635
1636 val = Qnil;
1637 for (i = charset_table_used - 1; i >= 0; i--)
1638 if (!NILP (AREF (charsets, i)))
1639 val = Fcons (CHARSET_NAME (charset_table + i), val);
1640 return val;
1641 }
1642
1643 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1644 1, 2, 0,
1645 doc: /* Return a list of charsets in STR.
1646 Optional arg TABLE if non-nil is a translation table to look up.
1647
1648 If STR is unibyte, the returned list may contain
1649 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1650 (str, table)
1651 Lisp_Object str, table;
1652 {
1653 Lisp_Object charsets;
1654 int i;
1655 Lisp_Object val;
1656
1657 CHECK_STRING (str);
1658
1659 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1660 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1661 charsets, table,
1662 STRING_MULTIBYTE (str));
1663 val = Qnil;
1664 for (i = charset_table_used - 1; i >= 0; i--)
1665 if (!NILP (AREF (charsets, i)))
1666 val = Fcons (CHARSET_NAME (charset_table + i), val);
1667 return val;
1668 }
1669
1670 \f
1671
1672 /* Return a unified character code for C (>= 0x110000). VAL is a
1673 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1674 charset symbol. */
1675 int
1676 maybe_unify_char (c, val)
1677 int c;
1678 Lisp_Object val;
1679 {
1680 struct charset *charset;
1681
1682 if (INTEGERP (val))
1683 return XINT (val);
1684 if (NILP (val))
1685 return c;
1686
1687 CHECK_CHARSET_GET_CHARSET (val, charset);
1688 load_charset (charset, 1);
1689 if (! inhibit_load_charset_map)
1690 {
1691 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1692 if (! NILP (val))
1693 c = XINT (val);
1694 }
1695 else
1696 {
1697 int code_index = c - CHARSET_CODE_OFFSET (charset);
1698 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1699
1700 if (unified > 0)
1701 c = unified;
1702 }
1703 return c;
1704 }
1705
1706
1707 /* Return a character correponding to the code-point CODE of
1708 CHARSET. */
1709
1710 int
1711 decode_char (charset, code)
1712 struct charset *charset;
1713 unsigned code;
1714 {
1715 int c, char_index;
1716 enum charset_method method = CHARSET_METHOD (charset);
1717
1718 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1719 return -1;
1720
1721 if (method == CHARSET_METHOD_SUBSET)
1722 {
1723 Lisp_Object subset_info;
1724
1725 subset_info = CHARSET_SUBSET (charset);
1726 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1727 code -= XINT (AREF (subset_info, 3));
1728 if (code < XFASTINT (AREF (subset_info, 1))
1729 || code > XFASTINT (AREF (subset_info, 2)))
1730 c = -1;
1731 else
1732 c = DECODE_CHAR (charset, code);
1733 }
1734 else if (method == CHARSET_METHOD_SUPERSET)
1735 {
1736 Lisp_Object parents;
1737
1738 parents = CHARSET_SUPERSET (charset);
1739 c = -1;
1740 for (; CONSP (parents); parents = XCDR (parents))
1741 {
1742 int id = XINT (XCAR (XCAR (parents)));
1743 int code_offset = XINT (XCDR (XCAR (parents)));
1744 unsigned this_code = code - code_offset;
1745
1746 charset = CHARSET_FROM_ID (id);
1747 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1748 break;
1749 }
1750 }
1751 else
1752 {
1753 char_index = CODE_POINT_TO_INDEX (charset, code);
1754 if (char_index < 0)
1755 return -1;
1756
1757 if (method == CHARSET_METHOD_MAP)
1758 {
1759 Lisp_Object decoder;
1760
1761 decoder = CHARSET_DECODER (charset);
1762 if (! VECTORP (decoder))
1763 {
1764 load_charset (charset, 1);
1765 decoder = CHARSET_DECODER (charset);
1766 }
1767 if (VECTORP (decoder))
1768 c = XINT (AREF (decoder, char_index));
1769 else
1770 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1771 }
1772 else /* method == CHARSET_METHOD_OFFSET */
1773 {
1774 c = char_index + CHARSET_CODE_OFFSET (charset);
1775 if (CHARSET_UNIFIED_P (charset)
1776 && c > MAX_UNICODE_CHAR)
1777 MAYBE_UNIFY_CHAR (c);
1778 }
1779 }
1780
1781 return c;
1782 }
1783
1784 /* Variable used temporarily by the macro ENCODE_CHAR. */
1785 Lisp_Object charset_work;
1786
1787 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1788 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1789 use CHARSET's strict_max_char instead of max_char. */
1790
1791 unsigned
1792 encode_char (charset, c)
1793 struct charset *charset;
1794 int c;
1795 {
1796 unsigned code;
1797 enum charset_method method = CHARSET_METHOD (charset);
1798
1799 if (CHARSET_UNIFIED_P (charset))
1800 {
1801 Lisp_Object deunifier;
1802 int code_index = -1;
1803
1804 deunifier = CHARSET_DEUNIFIER (charset);
1805 if (! CHAR_TABLE_P (deunifier))
1806 {
1807 load_charset (charset, 2);
1808 deunifier = CHARSET_DEUNIFIER (charset);
1809 }
1810 if (CHAR_TABLE_P (deunifier))
1811 {
1812 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1813
1814 if (INTEGERP (deunified))
1815 code_index = XINT (deunified);
1816 }
1817 else
1818 {
1819 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1820 }
1821 if (code_index >= 0)
1822 c = CHARSET_CODE_OFFSET (charset) + code_index;
1823 }
1824
1825 if (method == CHARSET_METHOD_SUBSET)
1826 {
1827 Lisp_Object subset_info;
1828 struct charset *this_charset;
1829
1830 subset_info = CHARSET_SUBSET (charset);
1831 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1832 code = ENCODE_CHAR (this_charset, c);
1833 if (code == CHARSET_INVALID_CODE (this_charset)
1834 || code < XFASTINT (AREF (subset_info, 1))
1835 || code > XFASTINT (AREF (subset_info, 2)))
1836 return CHARSET_INVALID_CODE (charset);
1837 code += XINT (AREF (subset_info, 3));
1838 return code;
1839 }
1840
1841 if (method == CHARSET_METHOD_SUPERSET)
1842 {
1843 Lisp_Object parents;
1844
1845 parents = CHARSET_SUPERSET (charset);
1846 for (; CONSP (parents); parents = XCDR (parents))
1847 {
1848 int id = XINT (XCAR (XCAR (parents)));
1849 int code_offset = XINT (XCDR (XCAR (parents)));
1850 struct charset *this_charset = CHARSET_FROM_ID (id);
1851
1852 code = ENCODE_CHAR (this_charset, c);
1853 if (code != CHARSET_INVALID_CODE (this_charset))
1854 return code + code_offset;
1855 }
1856 return CHARSET_INVALID_CODE (charset);
1857 }
1858
1859 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1860 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1861 return CHARSET_INVALID_CODE (charset);
1862
1863 if (method == CHARSET_METHOD_MAP)
1864 {
1865 Lisp_Object encoder;
1866 Lisp_Object val;
1867
1868 encoder = CHARSET_ENCODER (charset);
1869 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1870 {
1871 load_charset (charset, 2);
1872 encoder = CHARSET_ENCODER (charset);
1873 }
1874 if (CHAR_TABLE_P (encoder))
1875 {
1876 val = CHAR_TABLE_REF (encoder, c);
1877 if (NILP (val))
1878 return CHARSET_INVALID_CODE (charset);
1879 code = XINT (val);
1880 if (! CHARSET_COMPACT_CODES_P (charset))
1881 code = INDEX_TO_CODE_POINT (charset, code);
1882 }
1883 else
1884 {
1885 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1886 code = INDEX_TO_CODE_POINT (charset, code);
1887 }
1888 }
1889 else /* method == CHARSET_METHOD_OFFSET */
1890 {
1891 int code_index = c - CHARSET_CODE_OFFSET (charset);
1892
1893 code = INDEX_TO_CODE_POINT (charset, code_index);
1894 }
1895
1896 return code;
1897 }
1898
1899
1900 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1901 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1902 Return nil if CODE-POINT is not valid in CHARSET.
1903
1904 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1905
1906 Optional argument RESTRICTION specifies a way to map the pair of CCS
1907 and CODE-POINT to a character. Currently not supported and just ignored. */)
1908 (charset, code_point, restriction)
1909 Lisp_Object charset, code_point, restriction;
1910 {
1911 int c, id;
1912 unsigned code;
1913 struct charset *charsetp;
1914
1915 CHECK_CHARSET_GET_ID (charset, id);
1916 if (CONSP (code_point))
1917 {
1918 CHECK_NATNUM_CAR (code_point);
1919 CHECK_NATNUM_CDR (code_point);
1920 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1921 }
1922 else
1923 {
1924 CHECK_NATNUM (code_point);
1925 code = XINT (code_point);
1926 }
1927 charsetp = CHARSET_FROM_ID (id);
1928 c = DECODE_CHAR (charsetp, code);
1929 return (c >= 0 ? make_number (c) : Qnil);
1930 }
1931
1932
1933 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1934 doc: /* Encode the character CH into a code-point of CHARSET.
1935 Return nil if CHARSET doesn't include CH.
1936
1937 Optional argument RESTRICTION specifies a way to map CH to a
1938 code-point in CCS. Currently not supported and just ignored. */)
1939 (ch, charset, restriction)
1940 Lisp_Object ch, charset, restriction;
1941 {
1942 int id;
1943 unsigned code;
1944 struct charset *charsetp;
1945
1946 CHECK_CHARSET_GET_ID (charset, id);
1947 CHECK_NATNUM (ch);
1948 charsetp = CHARSET_FROM_ID (id);
1949 code = ENCODE_CHAR (charsetp, XINT (ch));
1950 if (code == CHARSET_INVALID_CODE (charsetp))
1951 return Qnil;
1952 if (code > 0x7FFFFFF)
1953 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1954 return make_number (code);
1955 }
1956
1957
1958 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1959 doc:
1960 /* Return a character of CHARSET whose position codes are CODEn.
1961
1962 CODE1 through CODE4 are optional, but if you don't supply sufficient
1963 position codes, it is assumed that the minimum code in each dimension
1964 is specified. */)
1965 (charset, code1, code2, code3, code4)
1966 Lisp_Object charset, code1, code2, code3, code4;
1967 {
1968 int id, dimension;
1969 struct charset *charsetp;
1970 unsigned code;
1971 int c;
1972
1973 CHECK_CHARSET_GET_ID (charset, id);
1974 charsetp = CHARSET_FROM_ID (id);
1975
1976 dimension = CHARSET_DIMENSION (charsetp);
1977 if (NILP (code1))
1978 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1979 ? 0 : CHARSET_MIN_CODE (charsetp));
1980 else
1981 {
1982 CHECK_NATNUM (code1);
1983 if (XFASTINT (code1) >= 0x100)
1984 args_out_of_range (make_number (0xFF), code1);
1985 code = XFASTINT (code1);
1986
1987 if (dimension > 1)
1988 {
1989 code <<= 8;
1990 if (NILP (code2))
1991 code |= charsetp->code_space[(dimension - 2) * 4];
1992 else
1993 {
1994 CHECK_NATNUM (code2);
1995 if (XFASTINT (code2) >= 0x100)
1996 args_out_of_range (make_number (0xFF), code2);
1997 code |= XFASTINT (code2);
1998 }
1999
2000 if (dimension > 2)
2001 {
2002 code <<= 8;
2003 if (NILP (code3))
2004 code |= charsetp->code_space[(dimension - 3) * 4];
2005 else
2006 {
2007 CHECK_NATNUM (code3);
2008 if (XFASTINT (code3) >= 0x100)
2009 args_out_of_range (make_number (0xFF), code3);
2010 code |= XFASTINT (code3);
2011 }
2012
2013 if (dimension > 3)
2014 {
2015 code <<= 8;
2016 if (NILP (code4))
2017 code |= charsetp->code_space[0];
2018 else
2019 {
2020 CHECK_NATNUM (code4);
2021 if (XFASTINT (code4) >= 0x100)
2022 args_out_of_range (make_number (0xFF), code4);
2023 code |= XFASTINT (code4);
2024 }
2025 }
2026 }
2027 }
2028 }
2029
2030 if (CHARSET_ISO_FINAL (charsetp) >= 0)
2031 code &= 0x7F7F7F7F;
2032 c = DECODE_CHAR (charsetp, code);
2033 if (c < 0)
2034 error ("Invalid code(s)");
2035 return make_number (c);
2036 }
2037
2038
2039 /* Return the first charset in CHARSET_LIST that contains C.
2040 CHARSET_LIST is a list of charset IDs. If it is nil, use
2041 Vcharset_ordered_list. */
2042
2043 struct charset *
2044 char_charset (c, charset_list, code_return)
2045 int c;
2046 Lisp_Object charset_list;
2047 unsigned *code_return;
2048 {
2049 int maybe_null = 0;
2050
2051 if (NILP (charset_list))
2052 charset_list = Vcharset_ordered_list;
2053 else
2054 maybe_null = 1;
2055
2056 while (CONSP (charset_list))
2057 {
2058 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2059 unsigned code = ENCODE_CHAR (charset, c);
2060
2061 if (code != CHARSET_INVALID_CODE (charset))
2062 {
2063 if (code_return)
2064 *code_return = code;
2065 return charset;
2066 }
2067 charset_list = XCDR (charset_list);
2068 if (c <= MAX_UNICODE_CHAR
2069 && EQ (charset_list, Vcharset_non_preferred_head))
2070 return CHARSET_FROM_ID (charset_unicode);
2071 }
2072 return (maybe_null ? NULL
2073 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2074 : CHARSET_FROM_ID (charset_eight_bit));
2075 }
2076
2077
2078 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2079 doc:
2080 /*Return list of charset and one to four position-codes of CH.
2081 The charset is decided by the current priority order of charsets.
2082 A position-code is a byte value of each dimension of the code-point of
2083 CH in the charset. */)
2084 (ch)
2085 Lisp_Object ch;
2086 {
2087 struct charset *charset;
2088 int c, dimension;
2089 unsigned code;
2090 Lisp_Object val;
2091
2092 CHECK_CHARACTER (ch);
2093 c = XFASTINT (ch);
2094 charset = CHAR_CHARSET (c);
2095 if (! charset)
2096 abort ();
2097 code = ENCODE_CHAR (charset, c);
2098 if (code == CHARSET_INVALID_CODE (charset))
2099 abort ();
2100 dimension = CHARSET_DIMENSION (charset);
2101 for (val = Qnil; dimension > 0; dimension--)
2102 {
2103 val = Fcons (make_number (code & 0xFF), val);
2104 code >>= 8;
2105 }
2106 return Fcons (CHARSET_NAME (charset), val);
2107 }
2108
2109
2110 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2111 doc: /* Return the charset of highest priority that contains CH.
2112 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2113 from which to find the charset. It may also be a coding system. In
2114 that case, find the charset from what supported by that coding system. */)
2115 (ch, restriction)
2116 Lisp_Object ch, restriction;
2117 {
2118 struct charset *charset;
2119
2120 CHECK_CHARACTER (ch);
2121 if (NILP (restriction))
2122 charset = CHAR_CHARSET (XINT (ch));
2123 else
2124 {
2125 Lisp_Object charset_list;
2126
2127 if (CONSP (restriction))
2128 {
2129 for (charset_list = Qnil; CONSP (restriction);
2130 restriction = XCDR (restriction))
2131 {
2132 int id;
2133
2134 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2135 charset_list = Fcons (make_number (id), charset_list);
2136 }
2137 charset_list = Fnreverse (charset_list);
2138 }
2139 else
2140 charset_list = coding_system_charset_list (restriction);
2141 charset = char_charset (XINT (ch), charset_list, NULL);
2142 if (! charset)
2143 return Qnil;
2144 }
2145 return (CHARSET_NAME (charset));
2146 }
2147
2148
2149 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2150 doc: /*
2151 Return charset of a character in the current buffer at position POS.
2152 If POS is nil, it defauls to the current point.
2153 If POS is out of range, the value is nil. */)
2154 (pos)
2155 Lisp_Object pos;
2156 {
2157 Lisp_Object ch;
2158 struct charset *charset;
2159
2160 ch = Fchar_after (pos);
2161 if (! INTEGERP (ch))
2162 return ch;
2163 charset = CHAR_CHARSET (XINT (ch));
2164 return (CHARSET_NAME (charset));
2165 }
2166
2167
2168 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2169 doc: /*
2170 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2171
2172 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2173 by their DIMENSION, CHARS, and FINAL-CHAR,
2174 whereas Emacs distinguishes them by charset symbol.
2175 See the documentation of the function `charset-info' for the meanings of
2176 DIMENSION, CHARS, and FINAL-CHAR. */)
2177 (dimension, chars, final_char)
2178 Lisp_Object dimension, chars, final_char;
2179 {
2180 int id;
2181 int chars_flag;
2182
2183 check_iso_charset_parameter (dimension, chars, final_char);
2184 chars_flag = XFASTINT (chars) == 96;
2185 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2186 XFASTINT (final_char));
2187 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2188 }
2189
2190
2191 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2192 0, 0, 0,
2193 doc: /*
2194 Internal use only.
2195 Clear temporary charset mapping tables.
2196 It should be called only from temacs invoked for dumping. */)
2197 ()
2198 {
2199 if (temp_charset_work)
2200 {
2201 free (temp_charset_work);
2202 temp_charset_work = NULL;
2203 }
2204
2205 if (CHAR_TABLE_P (Vchar_unify_table))
2206 Foptimize_char_table (Vchar_unify_table, Qnil);
2207
2208 return Qnil;
2209 }
2210
2211 DEFUN ("charset-priority-list", Fcharset_priority_list,
2212 Scharset_priority_list, 0, 1, 0,
2213 doc: /* Return the list of charsets ordered by priority.
2214 HIGHESTP non-nil means just return the highest priority one. */)
2215 (highestp)
2216 Lisp_Object highestp;
2217 {
2218 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2219
2220 if (!NILP (highestp))
2221 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2222
2223 while (!NILP (list))
2224 {
2225 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2226 list = XCDR (list);
2227 }
2228 return Fnreverse (val);
2229 }
2230
2231 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2232 1, MANY, 0,
2233 doc: /* Assign higher priority to the charsets given as arguments.
2234 usage: (set-charset-priority &rest charsets) */)
2235 (nargs, args)
2236 int nargs;
2237 Lisp_Object *args;
2238 {
2239 Lisp_Object new_head, old_list, arglist[2];
2240 Lisp_Object list_2022, list_emacs_mule;
2241 int i, id;
2242
2243 old_list = Fcopy_sequence (Vcharset_ordered_list);
2244 new_head = Qnil;
2245 for (i = 0; i < nargs; i++)
2246 {
2247 CHECK_CHARSET_GET_ID (args[i], id);
2248 if (! NILP (Fmemq (make_number (id), old_list)))
2249 {
2250 old_list = Fdelq (make_number (id), old_list);
2251 new_head = Fcons (make_number (id), new_head);
2252 }
2253 }
2254 arglist[0] = Fnreverse (new_head);
2255 arglist[1] = Vcharset_non_preferred_head = old_list;
2256 Vcharset_ordered_list = Fnconc (2, arglist);
2257 charset_ordered_list_tick++;
2258
2259 charset_unibyte = -1;
2260 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2261 CONSP (old_list); old_list = XCDR (old_list))
2262 {
2263 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2264 list_2022 = Fcons (XCAR (old_list), list_2022);
2265 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2266 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2267 if (charset_unibyte < 0)
2268 {
2269 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2270
2271 if (CHARSET_DIMENSION (charset) == 1
2272 && CHARSET_ASCII_COMPATIBLE_P (charset)
2273 && CHARSET_MAX_CHAR (charset) >= 0x80)
2274 charset_unibyte = CHARSET_ID (charset);
2275 }
2276 }
2277 Viso_2022_charset_list = Fnreverse (list_2022);
2278 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2279 if (charset_unibyte < 0)
2280 charset_unibyte = charset_iso_8859_1;
2281
2282 return Qnil;
2283 }
2284
2285 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2286 0, 1, 0,
2287 doc: /* Internal use only.
2288 Return charset identification number of CHARSET. */)
2289 (charset)
2290 Lisp_Object charset;
2291 {
2292 int id;
2293
2294 CHECK_CHARSET_GET_ID (charset, id);
2295 return make_number (id);
2296 }
2297
2298 \f
2299 void
2300 init_charset ()
2301 {
2302 Lisp_Object tempdir;
2303 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2304 if (access ((char *) SDATA (tempdir), 0) < 0)
2305 {
2306 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2307 Emacs will not function correctly without the character map files.\n\
2308 Please check your installation!\n",
2309 tempdir);
2310 /* TODO should this be a fatal error? (Bug#909) */
2311 }
2312
2313 Vcharset_map_path = Fcons (tempdir, Qnil);
2314 }
2315
2316
2317 void
2318 init_charset_once ()
2319 {
2320 int i, j, k;
2321
2322 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2323 for (j = 0; j < ISO_MAX_CHARS; j++)
2324 for (k = 0; k < ISO_MAX_FINAL; k++)
2325 iso_charset_table[i][j][k] = -1;
2326
2327 for (i = 0; i < 256; i++)
2328 emacs_mule_charset[i] = NULL;
2329
2330 charset_jisx0201_roman = -1;
2331 charset_jisx0208_1978 = -1;
2332 charset_jisx0208 = -1;
2333 charset_ksc5601 = -1;
2334
2335 for (i = 0; i < 128; i++)
2336 unibyte_to_multibyte_table[i] = i;
2337 for (; i < 256; i++)
2338 unibyte_to_multibyte_table[i] = BYTE8_TO_CHAR (i);
2339 }
2340
2341 #ifdef emacs
2342
2343 void
2344 syms_of_charset ()
2345 {
2346 DEFSYM (Qcharsetp, "charsetp");
2347
2348 DEFSYM (Qascii, "ascii");
2349 DEFSYM (Qunicode, "unicode");
2350 DEFSYM (Qemacs, "emacs");
2351 DEFSYM (Qeight_bit, "eight-bit");
2352 DEFSYM (Qiso_8859_1, "iso-8859-1");
2353
2354 DEFSYM (Qgl, "gl");
2355 DEFSYM (Qgr, "gr");
2356
2357 staticpro (&Vcharset_ordered_list);
2358 Vcharset_ordered_list = Qnil;
2359
2360 staticpro (&Viso_2022_charset_list);
2361 Viso_2022_charset_list = Qnil;
2362
2363 staticpro (&Vemacs_mule_charset_list);
2364 Vemacs_mule_charset_list = Qnil;
2365
2366 /* Don't staticpro them here. It's done in syms_of_fns. */
2367 QCtest = intern (":test");
2368 Qeq = intern ("eq");
2369
2370 staticpro (&Vcharset_hash_table);
2371 {
2372 Lisp_Object args[2];
2373 args[0] = QCtest;
2374 args[1] = Qeq;
2375 Vcharset_hash_table = Fmake_hash_table (2, args);
2376 }
2377
2378 charset_table_size = 128;
2379 charset_table = ((struct charset *)
2380 xmalloc (sizeof (struct charset) * charset_table_size));
2381 charset_table_used = 0;
2382
2383 defsubr (&Scharsetp);
2384 defsubr (&Smap_charset_chars);
2385 defsubr (&Sdefine_charset_internal);
2386 defsubr (&Sdefine_charset_alias);
2387 defsubr (&Scharset_plist);
2388 defsubr (&Sset_charset_plist);
2389 defsubr (&Sunify_charset);
2390 defsubr (&Sget_unused_iso_final_char);
2391 defsubr (&Sdeclare_equiv_charset);
2392 defsubr (&Sfind_charset_region);
2393 defsubr (&Sfind_charset_string);
2394 defsubr (&Sdecode_char);
2395 defsubr (&Sencode_char);
2396 defsubr (&Ssplit_char);
2397 defsubr (&Smake_char);
2398 defsubr (&Schar_charset);
2399 defsubr (&Scharset_after);
2400 defsubr (&Siso_charset);
2401 defsubr (&Sclear_charset_maps);
2402 defsubr (&Scharset_priority_list);
2403 defsubr (&Sset_charset_priority);
2404 defsubr (&Scharset_id_internal);
2405
2406 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2407 doc: /* *List of directories to search for charset map files. */);
2408 Vcharset_map_path = Qnil;
2409
2410 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2411 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2412 inhibit_load_charset_map = 0;
2413
2414 DEFVAR_LISP ("charset-list", &Vcharset_list,
2415 doc: /* List of all charsets ever defined. */);
2416 Vcharset_list = Qnil;
2417
2418 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2419 doc: /* ISO639 language mnemonic symbol for the current language environment.
2420 If the current language environment is for multiple languages (e.g. "Latin-1"),
2421 the value may be a list of mnemonics. */);
2422 Vcurrent_iso639_language = Qnil;
2423
2424 charset_ascii
2425 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2426 0, 127, 'B', -1, 0, 1, 0, 0);
2427 charset_iso_8859_1
2428 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2429 0, 255, -1, -1, -1, 1, 0, 0);
2430 charset_unicode
2431 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2432 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2433 charset_emacs
2434 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2435 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2436 charset_eight_bit
2437 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2438 128, 255, -1, 0, -1, 0, 1,
2439 MAX_5_BYTE_CHAR + 1);
2440 charset_unibyte = charset_iso_8859_1;
2441 }
2442
2443 #endif /* emacs */
2444
2445 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2446 (do not change this comment) */