]> code.delx.au - gnu-emacs/blob - src/charset.c
Use offsetof instead of own definition
[gnu-emacs] / src / charset.c
1 /* Basic character set support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007,
3 2008, 2009, 2010 Free Software Foundation, Inc.
4 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
5 2005, 2006, 2007, 2008, 2009, 2010
6 National Institute of Advanced Industrial Science and Technology (AIST)
7 Registration Number H14PRO021
8
9 Copyright (C) 2003, 2004
10 National Institute of Advanced Industrial Science and Technology (AIST)
11 Registration Number H13PRO009
12
13 This file is part of GNU Emacs.
14
15 GNU Emacs is free software: you can redistribute it and/or modify
16 it under the terms of the GNU General Public License as published by
17 the Free Software Foundation, either version 3 of the License, or
18 (at your option) any later version.
19
20 GNU Emacs is distributed in the hope that it will be useful,
21 but WITHOUT ANY WARRANTY; without even the implied warranty of
22 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
23 GNU General Public License for more details.
24
25 You should have received a copy of the GNU General Public License
26 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
27
28 #include <config.h>
29
30 #include <stdio.h>
31 #include <unistd.h>
32 #include <ctype.h>
33 #include <sys/types.h>
34 #include <setjmp.h>
35 #include "lisp.h"
36 #include "character.h"
37 #include "charset.h"
38 #include "coding.h"
39 #include "disptab.h"
40 #include "buffer.h"
41
42 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
43
44 A coded character set ("charset" hereafter) is a meaningful
45 collection (i.e. language, culture, functionality, etc.) of
46 characters. Emacs handles multiple charsets at once. In Emacs Lisp
47 code, a charset is represented by a symbol. In C code, a charset is
48 represented by its ID number or by a pointer to a struct charset.
49
50 The actual information about each charset is stored in two places.
51 Lispy information is stored in the hash table Vcharset_hash_table as
52 a vector (charset attributes). The other information is stored in
53 charset_table as a struct charset.
54
55 */
56
57 /* List of all charsets. This variable is used only from Emacs
58 Lisp. */
59 Lisp_Object Vcharset_list;
60
61 /* Hash table that contains attributes of each charset. Keys are
62 charset symbols, and values are vectors of charset attributes. */
63 Lisp_Object Vcharset_hash_table;
64
65 /* Table of struct charset. */
66 struct charset *charset_table;
67
68 static int charset_table_size;
69 static int charset_table_used;
70
71 Lisp_Object Qcharsetp;
72
73 /* Special charset symbols. */
74 Lisp_Object Qascii;
75 Lisp_Object Qeight_bit;
76 Lisp_Object Qiso_8859_1;
77 Lisp_Object Qunicode;
78 Lisp_Object Qemacs;
79
80 /* The corresponding charsets. */
81 int charset_ascii;
82 int charset_eight_bit;
83 int charset_iso_8859_1;
84 int charset_unicode;
85 int charset_emacs;
86
87 /* The other special charsets. */
88 int charset_jisx0201_roman;
89 int charset_jisx0208_1978;
90 int charset_jisx0208;
91 int charset_ksc5601;
92
93 /* Value of charset attribute `charset-iso-plane'. */
94 Lisp_Object Qgl, Qgr;
95
96 /* Charset of unibyte characters. */
97 int charset_unibyte;
98
99 /* List of charsets ordered by the priority. */
100 Lisp_Object Vcharset_ordered_list;
101
102 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
103 charsets. */
104 Lisp_Object Vcharset_non_preferred_head;
105
106 /* Incremented everytime we change Vcharset_ordered_list. This is
107 unsigned short so that it fits in Lisp_Int and never matches
108 -1. */
109 unsigned short charset_ordered_list_tick;
110
111 /* List of iso-2022 charsets. */
112 Lisp_Object Viso_2022_charset_list;
113
114 /* List of emacs-mule charsets. */
115 Lisp_Object Vemacs_mule_charset_list;
116
117 struct charset *emacs_mule_charset[256];
118
119 /* Mapping table from ISO2022's charset (specified by DIMENSION,
120 CHARS, and FINAL-CHAR) to Emacs' charset. */
121 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
122
123 Lisp_Object Vcharset_map_path;
124
125 /* If nonzero, don't load charset maps. */
126 int inhibit_load_charset_map;
127
128 Lisp_Object Vcurrent_iso639_language;
129
130 /* Defined in chartab.c */
131 extern void
132 map_char_table_for_charset (void (*c_function) (Lisp_Object, Lisp_Object),
133 Lisp_Object function, Lisp_Object table,
134 Lisp_Object arg, struct charset *charset,
135 unsigned from, unsigned to);
136
137 #define CODE_POINT_TO_INDEX(charset, code) \
138 ((charset)->code_linear_p \
139 ? (code) - (charset)->min_code \
140 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
141 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
142 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
143 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
144 ? (((((code) >> 24) - (charset)->code_space[12]) \
145 * (charset)->code_space[11]) \
146 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
147 * (charset)->code_space[7]) \
148 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
149 * (charset)->code_space[3]) \
150 + (((code) & 0xFF) - (charset)->code_space[0]) \
151 - ((charset)->char_index_offset)) \
152 : -1)
153
154
155 /* Convert the character index IDX to code-point CODE for CHARSET.
156 It is assumed that IDX is in a valid range. */
157
158 #define INDEX_TO_CODE_POINT(charset, idx) \
159 ((charset)->code_linear_p \
160 ? (idx) + (charset)->min_code \
161 : (idx += (charset)->char_index_offset, \
162 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
163 | (((charset)->code_space[4] \
164 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
165 << 8) \
166 | (((charset)->code_space[8] \
167 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
168 << 16) \
169 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
170 << 24))))
171
172 /* Structure to hold mapping tables for a charset. Used by temacs
173 invoked for dumping. */
174
175 static struct
176 {
177 /* The current charset for which the following tables are setup. */
178 struct charset *current;
179
180 /* 1 iff the following table is used for encoder. */
181 short for_encoder;
182
183 /* When the following table is used for encoding, mininum and
184 maxinum character of the current charset. */
185 int min_char, max_char;
186
187 /* A Unicode character correspoinding to the code indice 0 (i.e. the
188 minimum code-point) of the current charset, or -1 if the code
189 indice 0 is not a Unicode character. This is checked when
190 table.encoder[CHAR] is zero. */
191 int zero_index_char;
192
193 union {
194 /* Table mapping code-indices (not code-points) of the current
195 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
196 doesn't belong to the current charset. */
197 int decoder[0x10000];
198 /* Table mapping Unicode characters to code-indices of the current
199 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
200 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
201 (0x20000..0x2FFFF). Note that there is no charset map that
202 uses both SMP and SIP. */
203 unsigned short encoder[0x20000];
204 } table;
205 } *temp_charset_work;
206
207 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
208 do { \
209 if ((CODE) == 0) \
210 temp_charset_work->zero_index_char = (C); \
211 else if ((C) < 0x20000) \
212 temp_charset_work->table.encoder[(C)] = (CODE); \
213 else \
214 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
215 } while (0)
216
217 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
218 ((C) == temp_charset_work->zero_index_char ? 0 \
219 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
220 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
221 : temp_charset_work->table.encoder[(C) - 0x10000] \
222 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
223
224 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
225 (temp_charset_work->table.decoder[(CODE)] = (C))
226
227 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
228 (temp_charset_work->table.decoder[(CODE)])
229 \f
230
231 /* Set to 1 to warn that a charset map is loaded and thus a buffer
232 text and a string data may be relocated. */
233 int charset_map_loaded;
234
235 struct charset_map_entries
236 {
237 struct {
238 unsigned from, to;
239 int c;
240 } entry[0x10000];
241 struct charset_map_entries *next;
242 };
243
244 /* Load the mapping information of CHARSET from ENTRIES for
245 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
246 encoding (CONTROL_FLAG == 2).
247
248 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
249 and CHARSET->fast_map.
250
251 If CONTROL_FLAG is 1, setup the following tables according to
252 CHARSET->method and inhibit_load_charset_map.
253
254 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
255 ----------------------+--------------------+---------------------------
256 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
257 ----------------------+--------------------+---------------------------
258 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
259
260 If CONTROL_FLAG is 2, setup the following tables.
261
262 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
263 ----------------------+--------------------+---------------------------
264 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
265 ----------------------+--------------------+--------------------------
266 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
267 */
268
269 static void
270 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
271 {
272 Lisp_Object vec, table;
273 unsigned max_code = CHARSET_MAX_CODE (charset);
274 int ascii_compatible_p = charset->ascii_compatible_p;
275 int min_char, max_char, nonascii_min_char;
276 int i;
277 unsigned char *fast_map = charset->fast_map;
278
279 if (n_entries <= 0)
280 return;
281
282 if (control_flag)
283 {
284 if (! inhibit_load_charset_map)
285 {
286 if (control_flag == 1)
287 {
288 if (charset->method == CHARSET_METHOD_MAP)
289 {
290 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
291
292 vec = CHARSET_DECODER (charset)
293 = Fmake_vector (make_number (n), make_number (-1));
294 }
295 else
296 {
297 char_table_set_range (Vchar_unify_table,
298 charset->min_char, charset->max_char,
299 Qnil);
300 }
301 }
302 else
303 {
304 table = Fmake_char_table (Qnil, Qnil);
305 if (charset->method == CHARSET_METHOD_MAP)
306 CHARSET_ENCODER (charset) = table;
307 else
308 CHARSET_DEUNIFIER (charset) = table;
309 }
310 }
311 else
312 {
313 if (! temp_charset_work)
314 temp_charset_work = malloc (sizeof (*temp_charset_work));
315 if (control_flag == 1)
316 {
317 memset (temp_charset_work->table.decoder, -1,
318 sizeof (int) * 0x10000);
319 }
320 else
321 {
322 memset (temp_charset_work->table.encoder, 0,
323 sizeof (unsigned short) * 0x20000);
324 temp_charset_work->zero_index_char = -1;
325 }
326 temp_charset_work->current = charset;
327 temp_charset_work->for_encoder = (control_flag == 2);
328 control_flag += 2;
329 }
330 charset_map_loaded = 1;
331 }
332
333 min_char = max_char = entries->entry[0].c;
334 nonascii_min_char = MAX_CHAR;
335 for (i = 0; i < n_entries; i++)
336 {
337 unsigned from, to;
338 int from_index, to_index;
339 int from_c, to_c;
340 int idx = i % 0x10000;
341
342 if (i > 0 && idx == 0)
343 entries = entries->next;
344 from = entries->entry[idx].from;
345 to = entries->entry[idx].to;
346 from_c = entries->entry[idx].c;
347 from_index = CODE_POINT_TO_INDEX (charset, from);
348 if (from == to)
349 {
350 to_index = from_index;
351 to_c = from_c;
352 }
353 else
354 {
355 to_index = CODE_POINT_TO_INDEX (charset, to);
356 to_c = from_c + (to_index - from_index);
357 }
358 if (from_index < 0 || to_index < 0)
359 continue;
360
361 if (to_c > max_char)
362 max_char = to_c;
363 else if (from_c < min_char)
364 min_char = from_c;
365
366 if (control_flag == 1)
367 {
368 if (charset->method == CHARSET_METHOD_MAP)
369 for (; from_index <= to_index; from_index++, from_c++)
370 ASET (vec, from_index, make_number (from_c));
371 else
372 for (; from_index <= to_index; from_index++, from_c++)
373 CHAR_TABLE_SET (Vchar_unify_table,
374 CHARSET_CODE_OFFSET (charset) + from_index,
375 make_number (from_c));
376 }
377 else if (control_flag == 2)
378 {
379 if (charset->method == CHARSET_METHOD_MAP
380 && CHARSET_COMPACT_CODES_P (charset))
381 for (; from_index <= to_index; from_index++, from_c++)
382 {
383 unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
384
385 if (NILP (CHAR_TABLE_REF (table, from_c)))
386 CHAR_TABLE_SET (table, from_c, make_number (code));
387 }
388 else
389 for (; from_index <= to_index; from_index++, from_c++)
390 {
391 if (NILP (CHAR_TABLE_REF (table, from_c)))
392 CHAR_TABLE_SET (table, from_c, make_number (from_index));
393 }
394 }
395 else if (control_flag == 3)
396 for (; from_index <= to_index; from_index++, from_c++)
397 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
398 else if (control_flag == 4)
399 for (; from_index <= to_index; from_index++, from_c++)
400 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
401 else /* control_flag == 0 */
402 {
403 if (ascii_compatible_p)
404 {
405 if (! ASCII_BYTE_P (from_c))
406 {
407 if (from_c < nonascii_min_char)
408 nonascii_min_char = from_c;
409 }
410 else if (! ASCII_BYTE_P (to_c))
411 {
412 nonascii_min_char = 0x80;
413 }
414 }
415
416 for (; from_c <= to_c; from_c++)
417 CHARSET_FAST_MAP_SET (from_c, fast_map);
418 }
419 }
420
421 if (control_flag == 0)
422 {
423 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
424 ? nonascii_min_char : min_char);
425 CHARSET_MAX_CHAR (charset) = max_char;
426 }
427 else if (control_flag == 4)
428 {
429 temp_charset_work->min_char = min_char;
430 temp_charset_work->max_char = max_char;
431 }
432 }
433
434
435 /* Read a hexadecimal number (preceded by "0x") from the file FP while
436 paying attention to comment charcter '#'. */
437
438 static INLINE unsigned
439 read_hex (FILE *fp, int *eof)
440 {
441 int c;
442 unsigned n;
443
444 while ((c = getc (fp)) != EOF)
445 {
446 if (c == '#')
447 {
448 while ((c = getc (fp)) != EOF && c != '\n');
449 }
450 else if (c == '0')
451 {
452 if ((c = getc (fp)) == EOF || c == 'x')
453 break;
454 }
455 }
456 if (c == EOF)
457 {
458 *eof = 1;
459 return 0;
460 }
461 *eof = 0;
462 n = 0;
463 if (c == 'x')
464 while ((c = getc (fp)) != EOF && isxdigit (c))
465 n = ((n << 4)
466 | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
467 else
468 while ((c = getc (fp)) != EOF && isdigit (c))
469 n = (n * 10) + c - '0';
470 if (c != EOF)
471 ungetc (c, fp);
472 return n;
473 }
474
475 extern Lisp_Object Qfile_name_handler_alist;
476
477 /* Return a mapping vector for CHARSET loaded from MAPFILE.
478 Each line of MAPFILE has this form
479 0xAAAA 0xCCCC
480 where 0xAAAA is a code-point and 0xCCCC is the corresponding
481 character code, or this form
482 0xAAAA-0xBBBB 0xCCCC
483 where 0xAAAA and 0xBBBB are code-points specifying a range, and
484 0xCCCC is the first character code of the range.
485
486 The returned vector has this form:
487 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
488 where CODE1 is a code-point or a cons of code-points specifying a
489 range.
490
491 Note that this function uses `openp' to open MAPFILE but ignores
492 `file-name-handler-alist' to avoid running any Lisp code. */
493
494 extern void add_to_log (char *, Lisp_Object, Lisp_Object);
495
496 static void
497 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int control_flag)
498 {
499 unsigned min_code = CHARSET_MIN_CODE (charset);
500 unsigned max_code = CHARSET_MAX_CODE (charset);
501 int fd;
502 FILE *fp;
503 int eof;
504 Lisp_Object suffixes;
505 struct charset_map_entries *head, *entries;
506 int n_entries, count;
507 USE_SAFE_ALLOCA;
508
509 suffixes = Fcons (build_string (".map"),
510 Fcons (build_string (".TXT"), Qnil));
511
512 count = SPECPDL_INDEX ();
513 specbind (Qfile_name_handler_alist, Qnil);
514 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil);
515 unbind_to (count, Qnil);
516 if (fd < 0
517 || ! (fp = fdopen (fd, "r")))
518 error ("Failure in loading charset map: %S", SDATA (mapfile));
519
520 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
521 large (larger than MAX_ALLOCA). */
522 SAFE_ALLOCA (head, struct charset_map_entries *,
523 sizeof (struct charset_map_entries));
524 entries = head;
525 memset (entries, 0, sizeof (struct charset_map_entries));
526
527 n_entries = 0;
528 eof = 0;
529 while (1)
530 {
531 unsigned from, to;
532 int c;
533 int idx;
534
535 from = read_hex (fp, &eof);
536 if (eof)
537 break;
538 if (getc (fp) == '-')
539 to = read_hex (fp, &eof);
540 else
541 to = from;
542 c = (int) read_hex (fp, &eof);
543
544 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
545 continue;
546
547 if (n_entries > 0 && (n_entries % 0x10000) == 0)
548 {
549 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
550 sizeof (struct charset_map_entries));
551 entries = entries->next;
552 memset (entries, 0, sizeof (struct charset_map_entries));
553 }
554 idx = n_entries % 0x10000;
555 entries->entry[idx].from = from;
556 entries->entry[idx].to = to;
557 entries->entry[idx].c = c;
558 n_entries++;
559 }
560 fclose (fp);
561
562 load_charset_map (charset, head, n_entries, control_flag);
563 SAFE_FREE ();
564 }
565
566 static void
567 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
568 {
569 unsigned min_code = CHARSET_MIN_CODE (charset);
570 unsigned max_code = CHARSET_MAX_CODE (charset);
571 struct charset_map_entries *head, *entries;
572 int n_entries;
573 int len = ASIZE (vec);
574 int i;
575 USE_SAFE_ALLOCA;
576
577 if (len % 2 == 1)
578 {
579 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
580 return;
581 }
582
583 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
584 large (larger than MAX_ALLOCA). */
585 SAFE_ALLOCA (head, struct charset_map_entries *,
586 sizeof (struct charset_map_entries));
587 entries = head;
588 memset (entries, 0, sizeof (struct charset_map_entries));
589
590 n_entries = 0;
591 for (i = 0; i < len; i += 2)
592 {
593 Lisp_Object val, val2;
594 unsigned from, to;
595 int c;
596 int idx;
597
598 val = AREF (vec, i);
599 if (CONSP (val))
600 {
601 val2 = XCDR (val);
602 val = XCAR (val);
603 CHECK_NATNUM (val);
604 CHECK_NATNUM (val2);
605 from = XFASTINT (val);
606 to = XFASTINT (val2);
607 }
608 else
609 {
610 CHECK_NATNUM (val);
611 from = to = XFASTINT (val);
612 }
613 val = AREF (vec, i + 1);
614 CHECK_NATNUM (val);
615 c = XFASTINT (val);
616
617 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
618 continue;
619
620 if (n_entries > 0 && (n_entries % 0x10000) == 0)
621 {
622 SAFE_ALLOCA (entries->next, struct charset_map_entries *,
623 sizeof (struct charset_map_entries));
624 entries = entries->next;
625 memset (entries, 0, sizeof (struct charset_map_entries));
626 }
627 idx = n_entries % 0x10000;
628 entries->entry[idx].from = from;
629 entries->entry[idx].to = to;
630 entries->entry[idx].c = c;
631 n_entries++;
632 }
633
634 load_charset_map (charset, head, n_entries, control_flag);
635 SAFE_FREE ();
636 }
637
638
639 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
640 map it is (see the comment of load_charset_map for the detail). */
641
642 static void
643 load_charset (struct charset *charset, int control_flag)
644 {
645 Lisp_Object map;
646
647 if (inhibit_load_charset_map
648 && temp_charset_work
649 && charset == temp_charset_work->current
650 && ((control_flag == 2) == temp_charset_work->for_encoder))
651 return;
652
653 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
654 map = CHARSET_MAP (charset);
655 else if (CHARSET_UNIFIED_P (charset))
656 map = CHARSET_UNIFY_MAP (charset);
657 if (STRINGP (map))
658 load_charset_map_from_file (charset, map, control_flag);
659 else
660 load_charset_map_from_vector (charset, map, control_flag);
661 }
662
663
664 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
665 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
666 (Lisp_Object object)
667 {
668 return (CHARSETP (object) ? Qt : Qnil);
669 }
670
671
672 void map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
673 Lisp_Object function, Lisp_Object arg,
674 unsigned from, unsigned to);
675
676 void
677 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object), Lisp_Object function, Lisp_Object arg, unsigned int from, unsigned int to)
678 {
679 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
680 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
681 Lisp_Object range;
682 int c, stop;
683 struct gcpro gcpro1;
684
685 range = Fcons (Qnil, Qnil);
686 GCPRO1 (range);
687
688 c = temp_charset_work->min_char;
689 stop = (temp_charset_work->max_char < 0x20000
690 ? temp_charset_work->max_char : 0xFFFF);
691
692 while (1)
693 {
694 int index = GET_TEMP_CHARSET_WORK_ENCODER (c);
695
696 if (index >= from_idx && index <= to_idx)
697 {
698 if (NILP (XCAR (range)))
699 XSETCAR (range, make_number (c));
700 }
701 else if (! NILP (XCAR (range)))
702 {
703 XSETCDR (range, make_number (c - 1));
704 if (c_function)
705 (*c_function) (arg, range);
706 else
707 call2 (function, range, arg);
708 XSETCAR (range, Qnil);
709 }
710 if (c == stop)
711 {
712 if (c == temp_charset_work->max_char)
713 {
714 if (! NILP (XCAR (range)))
715 {
716 XSETCDR (range, make_number (c));
717 if (c_function)
718 (*c_function) (arg, range);
719 else
720 call2 (function, range, arg);
721 }
722 break;
723 }
724 c = 0x1FFFF;
725 stop = temp_charset_work->max_char;
726 }
727 c++;
728 }
729 UNGCPRO;
730 }
731
732 void
733 map_charset_chars (c_function, function, arg,
734 charset, from, to)
735 void (*c_function) (Lisp_Object, Lisp_Object);
736 Lisp_Object function, arg;
737 struct charset *charset;
738 unsigned from, to;
739 {
740 Lisp_Object range;
741 int partial;
742
743 partial = (from > CHARSET_MIN_CODE (charset)
744 || to < CHARSET_MAX_CODE (charset));
745
746 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
747 {
748 int from_idx = CODE_POINT_TO_INDEX (charset, from);
749 int to_idx = CODE_POINT_TO_INDEX (charset, to);
750 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
751 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
752
753 if (CHARSET_UNIFIED_P (charset))
754 {
755 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
756 load_charset (charset, 2);
757 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
758 map_char_table_for_charset (c_function, function,
759 CHARSET_DEUNIFIER (charset), arg,
760 partial ? charset : NULL, from, to);
761 else
762 map_charset_for_dump (c_function, function, arg, from, to);
763 }
764
765 range = Fcons (make_number (from_c), make_number (to_c));
766 if (NILP (function))
767 (*c_function) (arg, range);
768 else
769 call2 (function, range, arg);
770 }
771 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
772 {
773 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
774 load_charset (charset, 2);
775 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
776 map_char_table_for_charset (c_function, function,
777 CHARSET_ENCODER (charset), arg,
778 partial ? charset : NULL, from, to);
779 else
780 map_charset_for_dump (c_function, function, arg, from, to);
781 }
782 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
783 {
784 Lisp_Object subset_info;
785 int offset;
786
787 subset_info = CHARSET_SUBSET (charset);
788 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
789 offset = XINT (AREF (subset_info, 3));
790 from -= offset;
791 if (from < XFASTINT (AREF (subset_info, 1)))
792 from = XFASTINT (AREF (subset_info, 1));
793 to -= offset;
794 if (to > XFASTINT (AREF (subset_info, 2)))
795 to = XFASTINT (AREF (subset_info, 2));
796 map_charset_chars (c_function, function, arg, charset, from, to);
797 }
798 else /* i.e. CHARSET_METHOD_SUPERSET */
799 {
800 Lisp_Object parents;
801
802 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
803 parents = XCDR (parents))
804 {
805 int offset;
806 unsigned this_from, this_to;
807
808 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
809 offset = XINT (XCDR (XCAR (parents)));
810 this_from = from > offset ? from - offset : 0;
811 this_to = to > offset ? to - offset : 0;
812 if (this_from < CHARSET_MIN_CODE (charset))
813 this_from = CHARSET_MIN_CODE (charset);
814 if (this_to > CHARSET_MAX_CODE (charset))
815 this_to = CHARSET_MAX_CODE (charset);
816 map_charset_chars (c_function, function, arg, charset,
817 this_from, this_to);
818 }
819 }
820 }
821
822 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
823 doc: /* Call FUNCTION for all characters in CHARSET.
824 FUNCTION is called with an argument RANGE and the optional 3rd
825 argument ARG.
826
827 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
828 characters contained in CHARSET.
829
830 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
831 range of code points (in CHARSET) of target characters. */)
832 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
833 {
834 struct charset *cs;
835 unsigned from, to;
836
837 CHECK_CHARSET_GET_CHARSET (charset, cs);
838 if (NILP (from_code))
839 from = CHARSET_MIN_CODE (cs);
840 else
841 {
842 CHECK_NATNUM (from_code);
843 from = XINT (from_code);
844 if (from < CHARSET_MIN_CODE (cs))
845 from = CHARSET_MIN_CODE (cs);
846 }
847 if (NILP (to_code))
848 to = CHARSET_MAX_CODE (cs);
849 else
850 {
851 CHECK_NATNUM (to_code);
852 to = XINT (to_code);
853 if (to > CHARSET_MAX_CODE (cs))
854 to = CHARSET_MAX_CODE (cs);
855 }
856 map_charset_chars (NULL, function, arg, cs, from, to);
857 return Qnil;
858 }
859
860
861 /* Define a charset according to the arguments. The Nth argument is
862 the Nth attribute of the charset (the last attribute `charset-id'
863 is not included). See the docstring of `define-charset' for the
864 detail. */
865
866 DEFUN ("define-charset-internal", Fdefine_charset_internal,
867 Sdefine_charset_internal, charset_arg_max, MANY, 0,
868 doc: /* For internal use only.
869 usage: (define-charset-internal ...) */)
870 (int nargs, Lisp_Object *args)
871 {
872 /* Charset attr vector. */
873 Lisp_Object attrs;
874 Lisp_Object val;
875 unsigned hash_code;
876 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
877 int i, j;
878 struct charset charset;
879 int id;
880 int dimension;
881 int new_definition_p;
882 int nchars;
883
884 if (nargs != charset_arg_max)
885 return Fsignal (Qwrong_number_of_arguments,
886 Fcons (intern ("define-charset-internal"),
887 make_number (nargs)));
888
889 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
890
891 CHECK_SYMBOL (args[charset_arg_name]);
892 ASET (attrs, charset_name, args[charset_arg_name]);
893
894 val = args[charset_arg_code_space];
895 for (i = 0, dimension = 0, nchars = 1; i < 4; i++)
896 {
897 int min_byte, max_byte;
898
899 min_byte = XINT (Faref (val, make_number (i * 2)));
900 max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
901 if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
902 error ("Invalid :code-space value");
903 charset.code_space[i * 4] = min_byte;
904 charset.code_space[i * 4 + 1] = max_byte;
905 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
906 nchars *= charset.code_space[i * 4 + 2];
907 charset.code_space[i * 4 + 3] = nchars;
908 if (max_byte > 0)
909 dimension = i + 1;
910 }
911
912 val = args[charset_arg_dimension];
913 if (NILP (val))
914 charset.dimension = dimension;
915 else
916 {
917 CHECK_NATNUM (val);
918 charset.dimension = XINT (val);
919 if (charset.dimension < 1 || charset.dimension > 4)
920 args_out_of_range_3 (val, make_number (1), make_number (4));
921 }
922
923 charset.code_linear_p
924 = (charset.dimension == 1
925 || (charset.code_space[2] == 256
926 && (charset.dimension == 2
927 || (charset.code_space[6] == 256
928 && (charset.dimension == 3
929 || charset.code_space[10] == 256)))));
930
931 if (! charset.code_linear_p)
932 {
933 charset.code_space_mask = (unsigned char *) xmalloc (256);
934 memset (charset.code_space_mask, 0, 256);
935 for (i = 0; i < 4; i++)
936 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
937 j++)
938 charset.code_space_mask[j] |= (1 << i);
939 }
940
941 charset.iso_chars_96 = charset.code_space[2] == 96;
942
943 charset.min_code = (charset.code_space[0]
944 | (charset.code_space[4] << 8)
945 | (charset.code_space[8] << 16)
946 | (charset.code_space[12] << 24));
947 charset.max_code = (charset.code_space[1]
948 | (charset.code_space[5] << 8)
949 | (charset.code_space[9] << 16)
950 | (charset.code_space[13] << 24));
951 charset.char_index_offset = 0;
952
953 val = args[charset_arg_min_code];
954 if (! NILP (val))
955 {
956 unsigned code;
957
958 if (INTEGERP (val))
959 code = XINT (val);
960 else
961 {
962 CHECK_CONS (val);
963 CHECK_NUMBER_CAR (val);
964 CHECK_NUMBER_CDR (val);
965 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
966 }
967 if (code < charset.min_code
968 || code > charset.max_code)
969 args_out_of_range_3 (make_number (charset.min_code),
970 make_number (charset.max_code), val);
971 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
972 charset.min_code = code;
973 }
974
975 val = args[charset_arg_max_code];
976 if (! NILP (val))
977 {
978 unsigned code;
979
980 if (INTEGERP (val))
981 code = XINT (val);
982 else
983 {
984 CHECK_CONS (val);
985 CHECK_NUMBER_CAR (val);
986 CHECK_NUMBER_CDR (val);
987 code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
988 }
989 if (code < charset.min_code
990 || code > charset.max_code)
991 args_out_of_range_3 (make_number (charset.min_code),
992 make_number (charset.max_code), val);
993 charset.max_code = code;
994 }
995
996 charset.compact_codes_p = charset.max_code < 0x10000;
997
998 val = args[charset_arg_invalid_code];
999 if (NILP (val))
1000 {
1001 if (charset.min_code > 0)
1002 charset.invalid_code = 0;
1003 else
1004 {
1005 XSETINT (val, charset.max_code + 1);
1006 if (XINT (val) == charset.max_code + 1)
1007 charset.invalid_code = charset.max_code + 1;
1008 else
1009 error ("Attribute :invalid-code must be specified");
1010 }
1011 }
1012 else
1013 {
1014 CHECK_NATNUM (val);
1015 charset.invalid_code = XFASTINT (val);
1016 }
1017
1018 val = args[charset_arg_iso_final];
1019 if (NILP (val))
1020 charset.iso_final = -1;
1021 else
1022 {
1023 CHECK_NUMBER (val);
1024 if (XINT (val) < '0' || XINT (val) > 127)
1025 error ("Invalid iso-final-char: %d", XINT (val));
1026 charset.iso_final = XINT (val);
1027 }
1028
1029 val = args[charset_arg_iso_revision];
1030 if (NILP (val))
1031 charset.iso_revision = -1;
1032 else
1033 {
1034 CHECK_NUMBER (val);
1035 if (XINT (val) > 63)
1036 args_out_of_range (make_number (63), val);
1037 charset.iso_revision = XINT (val);
1038 }
1039
1040 val = args[charset_arg_emacs_mule_id];
1041 if (NILP (val))
1042 charset.emacs_mule_id = -1;
1043 else
1044 {
1045 CHECK_NATNUM (val);
1046 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1047 error ("Invalid emacs-mule-id: %d", XINT (val));
1048 charset.emacs_mule_id = XINT (val);
1049 }
1050
1051 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1052
1053 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1054
1055 charset.unified_p = 0;
1056
1057 memset (charset.fast_map, 0, sizeof (charset.fast_map));
1058
1059 if (! NILP (args[charset_arg_code_offset]))
1060 {
1061 val = args[charset_arg_code_offset];
1062 CHECK_NUMBER (val);
1063
1064 charset.method = CHARSET_METHOD_OFFSET;
1065 charset.code_offset = XINT (val);
1066
1067 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1068 charset.min_char = i + charset.code_offset;
1069 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1070 charset.max_char = i + charset.code_offset;
1071 if (charset.max_char > MAX_CHAR)
1072 error ("Unsupported max char: %d", charset.max_char);
1073
1074 i = (charset.min_char >> 7) << 7;
1075 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1076 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1077 i = (i >> 12) << 12;
1078 for (; i <= charset.max_char; i += 0x1000)
1079 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1080 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1081 charset.ascii_compatible_p = 1;
1082 }
1083 else if (! NILP (args[charset_arg_map]))
1084 {
1085 val = args[charset_arg_map];
1086 ASET (attrs, charset_map, val);
1087 charset.method = CHARSET_METHOD_MAP;
1088 }
1089 else if (! NILP (args[charset_arg_subset]))
1090 {
1091 Lisp_Object parent;
1092 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1093 struct charset *parent_charset;
1094
1095 val = args[charset_arg_subset];
1096 parent = Fcar (val);
1097 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1098 parent_min_code = Fnth (make_number (1), val);
1099 CHECK_NATNUM (parent_min_code);
1100 parent_max_code = Fnth (make_number (2), val);
1101 CHECK_NATNUM (parent_max_code);
1102 parent_code_offset = Fnth (make_number (3), val);
1103 CHECK_NUMBER (parent_code_offset);
1104 val = Fmake_vector (make_number (4), Qnil);
1105 ASET (val, 0, make_number (parent_charset->id));
1106 ASET (val, 1, parent_min_code);
1107 ASET (val, 2, parent_max_code);
1108 ASET (val, 3, parent_code_offset);
1109 ASET (attrs, charset_subset, val);
1110
1111 charset.method = CHARSET_METHOD_SUBSET;
1112 /* Here, we just copy the parent's fast_map. It's not accurate,
1113 but at least it works for quickly detecting which character
1114 DOESN'T belong to this charset. */
1115 for (i = 0; i < 190; i++)
1116 charset.fast_map[i] = parent_charset->fast_map[i];
1117
1118 /* We also copy these for parents. */
1119 charset.min_char = parent_charset->min_char;
1120 charset.max_char = parent_charset->max_char;
1121 }
1122 else if (! NILP (args[charset_arg_superset]))
1123 {
1124 val = args[charset_arg_superset];
1125 charset.method = CHARSET_METHOD_SUPERSET;
1126 val = Fcopy_sequence (val);
1127 ASET (attrs, charset_superset, val);
1128
1129 charset.min_char = MAX_CHAR;
1130 charset.max_char = 0;
1131 for (; ! NILP (val); val = Fcdr (val))
1132 {
1133 Lisp_Object elt, car_part, cdr_part;
1134 int this_id, offset;
1135 struct charset *this_charset;
1136
1137 elt = Fcar (val);
1138 if (CONSP (elt))
1139 {
1140 car_part = XCAR (elt);
1141 cdr_part = XCDR (elt);
1142 CHECK_CHARSET_GET_ID (car_part, this_id);
1143 CHECK_NUMBER (cdr_part);
1144 offset = XINT (cdr_part);
1145 }
1146 else
1147 {
1148 CHECK_CHARSET_GET_ID (elt, this_id);
1149 offset = 0;
1150 }
1151 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1152
1153 this_charset = CHARSET_FROM_ID (this_id);
1154 if (charset.min_char > this_charset->min_char)
1155 charset.min_char = this_charset->min_char;
1156 if (charset.max_char < this_charset->max_char)
1157 charset.max_char = this_charset->max_char;
1158 for (i = 0; i < 190; i++)
1159 charset.fast_map[i] |= this_charset->fast_map[i];
1160 }
1161 }
1162 else
1163 error ("None of :code-offset, :map, :parents are specified");
1164
1165 val = args[charset_arg_unify_map];
1166 if (! NILP (val) && !STRINGP (val))
1167 CHECK_VECTOR (val);
1168 ASET (attrs, charset_unify_map, val);
1169
1170 CHECK_LIST (args[charset_arg_plist]);
1171 ASET (attrs, charset_plist, args[charset_arg_plist]);
1172
1173 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1174 &hash_code);
1175 if (charset.hash_index >= 0)
1176 {
1177 new_definition_p = 0;
1178 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1179 HASH_VALUE (hash_table, charset.hash_index) = attrs;
1180 }
1181 else
1182 {
1183 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1184 hash_code);
1185 if (charset_table_used == charset_table_size)
1186 {
1187 struct charset *new_table
1188 = (struct charset *) xmalloc (sizeof (struct charset)
1189 * (charset_table_size + 16));
1190 memcpy (new_table, charset_table,
1191 sizeof (struct charset) * charset_table_size);
1192 charset_table_size += 16;
1193 charset_table = new_table;
1194 }
1195 id = charset_table_used++;
1196 new_definition_p = 1;
1197 }
1198
1199 ASET (attrs, charset_id, make_number (id));
1200 charset.id = id;
1201 charset_table[id] = charset;
1202
1203 if (charset.method == CHARSET_METHOD_MAP)
1204 {
1205 load_charset (&charset, 0);
1206 charset_table[id] = charset;
1207 }
1208
1209 if (charset.iso_final >= 0)
1210 {
1211 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1212 charset.iso_final) = id;
1213 if (new_definition_p)
1214 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1215 Fcons (make_number (id), Qnil));
1216 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1217 charset_jisx0201_roman = id;
1218 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1219 charset_jisx0208_1978 = id;
1220 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1221 charset_jisx0208 = id;
1222 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1223 charset_ksc5601 = id;
1224 }
1225
1226 if (charset.emacs_mule_id >= 0)
1227 {
1228 emacs_mule_charset[charset.emacs_mule_id] = CHARSET_FROM_ID (id);
1229 if (charset.emacs_mule_id < 0xA0)
1230 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1231 else
1232 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1233 if (new_definition_p)
1234 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1235 Fcons (make_number (id), Qnil));
1236 }
1237
1238 if (new_definition_p)
1239 {
1240 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1241 if (charset.supplementary_p)
1242 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1243 Fcons (make_number (id), Qnil));
1244 else
1245 {
1246 Lisp_Object tail;
1247
1248 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1249 {
1250 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1251
1252 if (cs->supplementary_p)
1253 break;
1254 }
1255 if (EQ (tail, Vcharset_ordered_list))
1256 Vcharset_ordered_list = Fcons (make_number (id),
1257 Vcharset_ordered_list);
1258 else if (NILP (tail))
1259 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1260 Fcons (make_number (id), Qnil));
1261 else
1262 {
1263 val = Fcons (XCAR (tail), XCDR (tail));
1264 XSETCDR (tail, val);
1265 XSETCAR (tail, make_number (id));
1266 }
1267 }
1268 charset_ordered_list_tick++;
1269 }
1270
1271 return Qnil;
1272 }
1273
1274
1275 /* Same as Fdefine_charset_internal but arguments are more convenient
1276 to call from C (typically in syms_of_charset). This can define a
1277 charset of `offset' method only. Return the ID of the new
1278 charset. */
1279
1280 static int
1281 define_charset_internal (Lisp_Object name,
1282 int dimension,
1283 unsigned char *code_space,
1284 unsigned min_code, unsigned max_code,
1285 int iso_final, int iso_revision, int emacs_mule_id,
1286 int ascii_compatible, int supplementary,
1287 int code_offset)
1288 {
1289 Lisp_Object args[charset_arg_max];
1290 Lisp_Object plist[14];
1291 Lisp_Object val;
1292 int i;
1293
1294 args[charset_arg_name] = name;
1295 args[charset_arg_dimension] = make_number (dimension);
1296 val = Fmake_vector (make_number (8), make_number (0));
1297 for (i = 0; i < 8; i++)
1298 ASET (val, i, make_number (code_space[i]));
1299 args[charset_arg_code_space] = val;
1300 args[charset_arg_min_code] = make_number (min_code);
1301 args[charset_arg_max_code] = make_number (max_code);
1302 args[charset_arg_iso_final]
1303 = (iso_final < 0 ? Qnil : make_number (iso_final));
1304 args[charset_arg_iso_revision] = make_number (iso_revision);
1305 args[charset_arg_emacs_mule_id]
1306 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1307 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1308 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1309 args[charset_arg_invalid_code] = Qnil;
1310 args[charset_arg_code_offset] = make_number (code_offset);
1311 args[charset_arg_map] = Qnil;
1312 args[charset_arg_subset] = Qnil;
1313 args[charset_arg_superset] = Qnil;
1314 args[charset_arg_unify_map] = Qnil;
1315
1316 plist[0] = intern_c_string (":name");
1317 plist[1] = args[charset_arg_name];
1318 plist[2] = intern_c_string (":dimension");
1319 plist[3] = args[charset_arg_dimension];
1320 plist[4] = intern_c_string (":code-space");
1321 plist[5] = args[charset_arg_code_space];
1322 plist[6] = intern_c_string (":iso-final-char");
1323 plist[7] = args[charset_arg_iso_final];
1324 plist[8] = intern_c_string (":emacs-mule-id");
1325 plist[9] = args[charset_arg_emacs_mule_id];
1326 plist[10] = intern_c_string (":ascii-compatible-p");
1327 plist[11] = args[charset_arg_ascii_compatible_p];
1328 plist[12] = intern_c_string (":code-offset");
1329 plist[13] = args[charset_arg_code_offset];
1330
1331 args[charset_arg_plist] = Flist (14, plist);
1332 Fdefine_charset_internal (charset_arg_max, args);
1333
1334 return XINT (CHARSET_SYMBOL_ID (name));
1335 }
1336
1337
1338 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1339 Sdefine_charset_alias, 2, 2, 0,
1340 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1341 (Lisp_Object alias, Lisp_Object charset)
1342 {
1343 Lisp_Object attr;
1344
1345 CHECK_CHARSET_GET_ATTR (charset, attr);
1346 Fputhash (alias, attr, Vcharset_hash_table);
1347 Vcharset_list = Fcons (alias, Vcharset_list);
1348 return Qnil;
1349 }
1350
1351
1352 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1353 doc: /* Return the property list of CHARSET. */)
1354 (Lisp_Object charset)
1355 {
1356 Lisp_Object attrs;
1357
1358 CHECK_CHARSET_GET_ATTR (charset, attrs);
1359 return CHARSET_ATTR_PLIST (attrs);
1360 }
1361
1362
1363 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1364 doc: /* Set CHARSET's property list to PLIST. */)
1365 (Lisp_Object charset, Lisp_Object plist)
1366 {
1367 Lisp_Object attrs;
1368
1369 CHECK_CHARSET_GET_ATTR (charset, attrs);
1370 CHARSET_ATTR_PLIST (attrs) = plist;
1371 return plist;
1372 }
1373
1374
1375 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1376 doc: /* Unify characters of CHARSET with Unicode.
1377 This means reading the relevant file and installing the table defined
1378 by CHARSET's `:unify-map' property.
1379
1380 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1381 the same meaning as the `:unify-map' attribute in the function
1382 `define-charset' (which see).
1383
1384 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1385 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1386 {
1387 int id;
1388 struct charset *cs;
1389
1390 CHECK_CHARSET_GET_ID (charset, id);
1391 cs = CHARSET_FROM_ID (id);
1392 if (NILP (deunify)
1393 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1394 : ! CHARSET_UNIFIED_P (cs))
1395 return Qnil;
1396
1397 CHARSET_UNIFIED_P (cs) = 0;
1398 if (NILP (deunify))
1399 {
1400 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1401 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1402 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1403 if (NILP (unify_map))
1404 unify_map = CHARSET_UNIFY_MAP (cs);
1405 else
1406 {
1407 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1408 signal_error ("Bad unify-map", unify_map);
1409 CHARSET_UNIFY_MAP (cs) = unify_map;
1410 }
1411 if (NILP (Vchar_unify_table))
1412 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1413 char_table_set_range (Vchar_unify_table,
1414 cs->min_char, cs->max_char, charset);
1415 CHARSET_UNIFIED_P (cs) = 1;
1416 }
1417 else if (CHAR_TABLE_P (Vchar_unify_table))
1418 {
1419 int min_code = CHARSET_MIN_CODE (cs);
1420 int max_code = CHARSET_MAX_CODE (cs);
1421 int min_char = DECODE_CHAR (cs, min_code);
1422 int max_char = DECODE_CHAR (cs, max_code);
1423
1424 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1425 }
1426
1427 return Qnil;
1428 }
1429
1430 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1431 Sget_unused_iso_final_char, 2, 2, 0,
1432 doc: /*
1433 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1434 DIMENSION is the number of bytes to represent a character: 1 or 2.
1435 CHARS is the number of characters in a dimension: 94 or 96.
1436
1437 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1438 If there's no unused final char for the specified kind of charset,
1439 return nil. */)
1440 (Lisp_Object dimension, Lisp_Object chars)
1441 {
1442 int final_char;
1443
1444 CHECK_NUMBER (dimension);
1445 CHECK_NUMBER (chars);
1446 if (XINT (dimension) != 1 && XINT (dimension) != 2 && XINT (dimension) != 3)
1447 args_out_of_range_3 (dimension, make_number (1), make_number (3));
1448 if (XINT (chars) != 94 && XINT (chars) != 96)
1449 args_out_of_range_3 (chars, make_number (94), make_number (96));
1450 for (final_char = '0'; final_char <= '?'; final_char++)
1451 if (ISO_CHARSET_TABLE (XINT (dimension), XINT (chars), final_char) < 0)
1452 break;
1453 return (final_char <= '?' ? make_number (final_char) : Qnil);
1454 }
1455
1456 static void
1457 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
1458 {
1459 CHECK_NATNUM (dimension);
1460 CHECK_NATNUM (chars);
1461 CHECK_NATNUM (final_char);
1462
1463 if (XINT (dimension) > 3)
1464 error ("Invalid DIMENSION %d, it should be 1, 2, or 3", XINT (dimension));
1465 if (XINT (chars) != 94 && XINT (chars) != 96)
1466 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
1467 if (XINT (final_char) < '0' || XINT (final_char) > '~')
1468 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
1469 }
1470
1471
1472 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1473 4, 4, 0,
1474 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1475
1476 On decoding by an ISO-2022 base coding system, when a charset
1477 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1478 if CHARSET is designated instead. */)
1479 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1480 {
1481 int id;
1482 int chars_flag;
1483
1484 CHECK_CHARSET_GET_ID (charset, id);
1485 check_iso_charset_parameter (dimension, chars, final_char);
1486 chars_flag = XINT (chars) == 96;
1487 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XINT (final_char)) = id;
1488 return Qnil;
1489 }
1490
1491
1492 /* Return information about charsets in the text at PTR of NBYTES
1493 bytes, which are NCHARS characters. The value is:
1494
1495 0: Each character is represented by one byte. This is always
1496 true for a unibyte string. For a multibyte string, true if
1497 it contains only ASCII characters.
1498
1499 1: No charsets other than ascii, control-1, and latin-1 are
1500 found.
1501
1502 2: Otherwise.
1503 */
1504
1505 int
1506 string_xstring_p (Lisp_Object string)
1507 {
1508 const unsigned char *p = SDATA (string);
1509 const unsigned char *endp = p + SBYTES (string);
1510
1511 if (SCHARS (string) == SBYTES (string))
1512 return 0;
1513
1514 while (p < endp)
1515 {
1516 int c = STRING_CHAR_ADVANCE (p);
1517
1518 if (c >= 0x100)
1519 return 2;
1520 }
1521 return 1;
1522 }
1523
1524
1525 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1526
1527 CHARSETS is a vector. If Nth element is non-nil, it means the
1528 charset whose id is N is already found.
1529
1530 It may lookup a translation table TABLE if supplied. */
1531
1532 static void
1533 find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
1534 {
1535 const unsigned char *pend = ptr + nbytes;
1536
1537 if (nchars == nbytes)
1538 {
1539 if (multibyte)
1540 ASET (charsets, charset_ascii, Qt);
1541 else
1542 while (ptr < pend)
1543 {
1544 int c = *ptr++;
1545
1546 if (!NILP (table))
1547 c = translate_char (table, c);
1548 if (ASCII_BYTE_P (c))
1549 ASET (charsets, charset_ascii, Qt);
1550 else
1551 ASET (charsets, charset_eight_bit, Qt);
1552 }
1553 }
1554 else
1555 {
1556 while (ptr < pend)
1557 {
1558 int c = STRING_CHAR_ADVANCE (ptr);
1559 struct charset *charset;
1560
1561 if (!NILP (table))
1562 c = translate_char (table, c);
1563 charset = CHAR_CHARSET (c);
1564 ASET (charsets, CHARSET_ID (charset), Qt);
1565 }
1566 }
1567 }
1568
1569 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1570 2, 3, 0,
1571 doc: /* Return a list of charsets in the region between BEG and END.
1572 BEG and END are buffer positions.
1573 Optional arg TABLE if non-nil is a translation table to look up.
1574
1575 If the current buffer is unibyte, the returned list may contain
1576 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1577 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1578 {
1579 Lisp_Object charsets;
1580 EMACS_INT from, from_byte, to, stop, stop_byte;
1581 int i;
1582 Lisp_Object val;
1583 int multibyte = ! NILP (current_buffer->enable_multibyte_characters);
1584
1585 validate_region (&beg, &end);
1586 from = XFASTINT (beg);
1587 stop = to = XFASTINT (end);
1588
1589 if (from < GPT && GPT < to)
1590 {
1591 stop = GPT;
1592 stop_byte = GPT_BYTE;
1593 }
1594 else
1595 stop_byte = CHAR_TO_BYTE (stop);
1596
1597 from_byte = CHAR_TO_BYTE (from);
1598
1599 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1600 while (1)
1601 {
1602 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1603 stop_byte - from_byte, charsets, table,
1604 multibyte);
1605 if (stop < to)
1606 {
1607 from = stop, from_byte = stop_byte;
1608 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1609 }
1610 else
1611 break;
1612 }
1613
1614 val = Qnil;
1615 for (i = charset_table_used - 1; i >= 0; i--)
1616 if (!NILP (AREF (charsets, i)))
1617 val = Fcons (CHARSET_NAME (charset_table + i), val);
1618 return val;
1619 }
1620
1621 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1622 1, 2, 0,
1623 doc: /* Return a list of charsets in STR.
1624 Optional arg TABLE if non-nil is a translation table to look up.
1625
1626 If STR is unibyte, the returned list may contain
1627 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1628 (Lisp_Object str, Lisp_Object table)
1629 {
1630 Lisp_Object charsets;
1631 int i;
1632 Lisp_Object val;
1633
1634 CHECK_STRING (str);
1635
1636 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1637 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1638 charsets, table,
1639 STRING_MULTIBYTE (str));
1640 val = Qnil;
1641 for (i = charset_table_used - 1; i >= 0; i--)
1642 if (!NILP (AREF (charsets, i)))
1643 val = Fcons (CHARSET_NAME (charset_table + i), val);
1644 return val;
1645 }
1646
1647 \f
1648
1649 /* Return a unified character code for C (>= 0x110000). VAL is a
1650 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1651 charset symbol. */
1652 int
1653 maybe_unify_char (int c, Lisp_Object val)
1654 {
1655 struct charset *charset;
1656
1657 if (INTEGERP (val))
1658 return XINT (val);
1659 if (NILP (val))
1660 return c;
1661
1662 CHECK_CHARSET_GET_CHARSET (val, charset);
1663 load_charset (charset, 1);
1664 if (! inhibit_load_charset_map)
1665 {
1666 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1667 if (! NILP (val))
1668 c = XINT (val);
1669 }
1670 else
1671 {
1672 int code_index = c - CHARSET_CODE_OFFSET (charset);
1673 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1674
1675 if (unified > 0)
1676 c = unified;
1677 }
1678 return c;
1679 }
1680
1681
1682 /* Return a character correponding to the code-point CODE of
1683 CHARSET. */
1684
1685 int
1686 decode_char (struct charset *charset, unsigned int code)
1687 {
1688 int c, char_index;
1689 enum charset_method method = CHARSET_METHOD (charset);
1690
1691 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1692 return -1;
1693
1694 if (method == CHARSET_METHOD_SUBSET)
1695 {
1696 Lisp_Object subset_info;
1697
1698 subset_info = CHARSET_SUBSET (charset);
1699 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1700 code -= XINT (AREF (subset_info, 3));
1701 if (code < XFASTINT (AREF (subset_info, 1))
1702 || code > XFASTINT (AREF (subset_info, 2)))
1703 c = -1;
1704 else
1705 c = DECODE_CHAR (charset, code);
1706 }
1707 else if (method == CHARSET_METHOD_SUPERSET)
1708 {
1709 Lisp_Object parents;
1710
1711 parents = CHARSET_SUPERSET (charset);
1712 c = -1;
1713 for (; CONSP (parents); parents = XCDR (parents))
1714 {
1715 int id = XINT (XCAR (XCAR (parents)));
1716 int code_offset = XINT (XCDR (XCAR (parents)));
1717 unsigned this_code = code - code_offset;
1718
1719 charset = CHARSET_FROM_ID (id);
1720 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1721 break;
1722 }
1723 }
1724 else
1725 {
1726 char_index = CODE_POINT_TO_INDEX (charset, code);
1727 if (char_index < 0)
1728 return -1;
1729
1730 if (method == CHARSET_METHOD_MAP)
1731 {
1732 Lisp_Object decoder;
1733
1734 decoder = CHARSET_DECODER (charset);
1735 if (! VECTORP (decoder))
1736 {
1737 load_charset (charset, 1);
1738 decoder = CHARSET_DECODER (charset);
1739 }
1740 if (VECTORP (decoder))
1741 c = XINT (AREF (decoder, char_index));
1742 else
1743 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1744 }
1745 else /* method == CHARSET_METHOD_OFFSET */
1746 {
1747 c = char_index + CHARSET_CODE_OFFSET (charset);
1748 if (CHARSET_UNIFIED_P (charset)
1749 && c > MAX_UNICODE_CHAR)
1750 MAYBE_UNIFY_CHAR (c);
1751 }
1752 }
1753
1754 return c;
1755 }
1756
1757 /* Variable used temporarily by the macro ENCODE_CHAR. */
1758 Lisp_Object charset_work;
1759
1760 /* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
1761 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1762 use CHARSET's strict_max_char instead of max_char. */
1763
1764 unsigned
1765 encode_char (struct charset *charset, int c)
1766 {
1767 unsigned code;
1768 enum charset_method method = CHARSET_METHOD (charset);
1769
1770 if (CHARSET_UNIFIED_P (charset))
1771 {
1772 Lisp_Object deunifier;
1773 int code_index = -1;
1774
1775 deunifier = CHARSET_DEUNIFIER (charset);
1776 if (! CHAR_TABLE_P (deunifier))
1777 {
1778 load_charset (charset, 2);
1779 deunifier = CHARSET_DEUNIFIER (charset);
1780 }
1781 if (CHAR_TABLE_P (deunifier))
1782 {
1783 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1784
1785 if (INTEGERP (deunified))
1786 code_index = XINT (deunified);
1787 }
1788 else
1789 {
1790 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1791 }
1792 if (code_index >= 0)
1793 c = CHARSET_CODE_OFFSET (charset) + code_index;
1794 }
1795
1796 if (method == CHARSET_METHOD_SUBSET)
1797 {
1798 Lisp_Object subset_info;
1799 struct charset *this_charset;
1800
1801 subset_info = CHARSET_SUBSET (charset);
1802 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1803 code = ENCODE_CHAR (this_charset, c);
1804 if (code == CHARSET_INVALID_CODE (this_charset)
1805 || code < XFASTINT (AREF (subset_info, 1))
1806 || code > XFASTINT (AREF (subset_info, 2)))
1807 return CHARSET_INVALID_CODE (charset);
1808 code += XINT (AREF (subset_info, 3));
1809 return code;
1810 }
1811
1812 if (method == CHARSET_METHOD_SUPERSET)
1813 {
1814 Lisp_Object parents;
1815
1816 parents = CHARSET_SUPERSET (charset);
1817 for (; CONSP (parents); parents = XCDR (parents))
1818 {
1819 int id = XINT (XCAR (XCAR (parents)));
1820 int code_offset = XINT (XCDR (XCAR (parents)));
1821 struct charset *this_charset = CHARSET_FROM_ID (id);
1822
1823 code = ENCODE_CHAR (this_charset, c);
1824 if (code != CHARSET_INVALID_CODE (this_charset))
1825 return code + code_offset;
1826 }
1827 return CHARSET_INVALID_CODE (charset);
1828 }
1829
1830 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1831 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1832 return CHARSET_INVALID_CODE (charset);
1833
1834 if (method == CHARSET_METHOD_MAP)
1835 {
1836 Lisp_Object encoder;
1837 Lisp_Object val;
1838
1839 encoder = CHARSET_ENCODER (charset);
1840 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1841 {
1842 load_charset (charset, 2);
1843 encoder = CHARSET_ENCODER (charset);
1844 }
1845 if (CHAR_TABLE_P (encoder))
1846 {
1847 val = CHAR_TABLE_REF (encoder, c);
1848 if (NILP (val))
1849 return CHARSET_INVALID_CODE (charset);
1850 code = XINT (val);
1851 if (! CHARSET_COMPACT_CODES_P (charset))
1852 code = INDEX_TO_CODE_POINT (charset, code);
1853 }
1854 else
1855 {
1856 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1857 code = INDEX_TO_CODE_POINT (charset, code);
1858 }
1859 }
1860 else /* method == CHARSET_METHOD_OFFSET */
1861 {
1862 int code_index = c - CHARSET_CODE_OFFSET (charset);
1863
1864 code = INDEX_TO_CODE_POINT (charset, code_index);
1865 }
1866
1867 return code;
1868 }
1869
1870
1871 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1872 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1873 Return nil if CODE-POINT is not valid in CHARSET.
1874
1875 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE).
1876
1877 Optional argument RESTRICTION specifies a way to map the pair of CCS
1878 and CODE-POINT to a character. Currently not supported and just ignored. */)
1879 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
1880 {
1881 int c, id;
1882 unsigned code;
1883 struct charset *charsetp;
1884
1885 CHECK_CHARSET_GET_ID (charset, id);
1886 if (CONSP (code_point))
1887 {
1888 CHECK_NATNUM_CAR (code_point);
1889 CHECK_NATNUM_CDR (code_point);
1890 code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
1891 }
1892 else
1893 {
1894 CHECK_NATNUM (code_point);
1895 code = XINT (code_point);
1896 }
1897 charsetp = CHARSET_FROM_ID (id);
1898 c = DECODE_CHAR (charsetp, code);
1899 return (c >= 0 ? make_number (c) : Qnil);
1900 }
1901
1902
1903 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1904 doc: /* Encode the character CH into a code-point of CHARSET.
1905 Return nil if CHARSET doesn't include CH.
1906
1907 Optional argument RESTRICTION specifies a way to map CH to a
1908 code-point in CCS. Currently not supported and just ignored. */)
1909 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
1910 {
1911 int id;
1912 unsigned code;
1913 struct charset *charsetp;
1914
1915 CHECK_CHARSET_GET_ID (charset, id);
1916 CHECK_NATNUM (ch);
1917 charsetp = CHARSET_FROM_ID (id);
1918 code = ENCODE_CHAR (charsetp, XINT (ch));
1919 if (code == CHARSET_INVALID_CODE (charsetp))
1920 return Qnil;
1921 if (code > 0x7FFFFFF)
1922 return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
1923 return make_number (code);
1924 }
1925
1926
1927 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1928 doc:
1929 /* Return a character of CHARSET whose position codes are CODEn.
1930
1931 CODE1 through CODE4 are optional, but if you don't supply sufficient
1932 position codes, it is assumed that the minimum code in each dimension
1933 is specified. */)
1934 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1935 {
1936 int id, dimension;
1937 struct charset *charsetp;
1938 unsigned code;
1939 int c;
1940
1941 CHECK_CHARSET_GET_ID (charset, id);
1942 charsetp = CHARSET_FROM_ID (id);
1943
1944 dimension = CHARSET_DIMENSION (charsetp);
1945 if (NILP (code1))
1946 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1947 ? 0 : CHARSET_MIN_CODE (charsetp));
1948 else
1949 {
1950 CHECK_NATNUM (code1);
1951 if (XFASTINT (code1) >= 0x100)
1952 args_out_of_range (make_number (0xFF), code1);
1953 code = XFASTINT (code1);
1954
1955 if (dimension > 1)
1956 {
1957 code <<= 8;
1958 if (NILP (code2))
1959 code |= charsetp->code_space[(dimension - 2) * 4];
1960 else
1961 {
1962 CHECK_NATNUM (code2);
1963 if (XFASTINT (code2) >= 0x100)
1964 args_out_of_range (make_number (0xFF), code2);
1965 code |= XFASTINT (code2);
1966 }
1967
1968 if (dimension > 2)
1969 {
1970 code <<= 8;
1971 if (NILP (code3))
1972 code |= charsetp->code_space[(dimension - 3) * 4];
1973 else
1974 {
1975 CHECK_NATNUM (code3);
1976 if (XFASTINT (code3) >= 0x100)
1977 args_out_of_range (make_number (0xFF), code3);
1978 code |= XFASTINT (code3);
1979 }
1980
1981 if (dimension > 3)
1982 {
1983 code <<= 8;
1984 if (NILP (code4))
1985 code |= charsetp->code_space[0];
1986 else
1987 {
1988 CHECK_NATNUM (code4);
1989 if (XFASTINT (code4) >= 0x100)
1990 args_out_of_range (make_number (0xFF), code4);
1991 code |= XFASTINT (code4);
1992 }
1993 }
1994 }
1995 }
1996 }
1997
1998 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1999 code &= 0x7F7F7F7F;
2000 c = DECODE_CHAR (charsetp, code);
2001 if (c < 0)
2002 error ("Invalid code(s)");
2003 return make_number (c);
2004 }
2005
2006
2007 /* Return the first charset in CHARSET_LIST that contains C.
2008 CHARSET_LIST is a list of charset IDs. If it is nil, use
2009 Vcharset_ordered_list. */
2010
2011 struct charset *
2012 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
2013 {
2014 int maybe_null = 0;
2015
2016 if (NILP (charset_list))
2017 charset_list = Vcharset_ordered_list;
2018 else
2019 maybe_null = 1;
2020
2021 while (CONSP (charset_list))
2022 {
2023 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
2024 unsigned code = ENCODE_CHAR (charset, c);
2025
2026 if (code != CHARSET_INVALID_CODE (charset))
2027 {
2028 if (code_return)
2029 *code_return = code;
2030 return charset;
2031 }
2032 charset_list = XCDR (charset_list);
2033 if (! maybe_null
2034 && c <= MAX_UNICODE_CHAR
2035 && EQ (charset_list, Vcharset_non_preferred_head))
2036 return CHARSET_FROM_ID (charset_unicode);
2037 }
2038 return (maybe_null ? NULL
2039 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2040 : CHARSET_FROM_ID (charset_eight_bit));
2041 }
2042
2043
2044 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2045 doc:
2046 /*Return list of charset and one to four position-codes of CH.
2047 The charset is decided by the current priority order of charsets.
2048 A position-code is a byte value of each dimension of the code-point of
2049 CH in the charset. */)
2050 (Lisp_Object ch)
2051 {
2052 struct charset *charset;
2053 int c, dimension;
2054 unsigned code;
2055 Lisp_Object val;
2056
2057 CHECK_CHARACTER (ch);
2058 c = XFASTINT (ch);
2059 charset = CHAR_CHARSET (c);
2060 if (! charset)
2061 abort ();
2062 code = ENCODE_CHAR (charset, c);
2063 if (code == CHARSET_INVALID_CODE (charset))
2064 abort ();
2065 dimension = CHARSET_DIMENSION (charset);
2066 for (val = Qnil; dimension > 0; dimension--)
2067 {
2068 val = Fcons (make_number (code & 0xFF), val);
2069 code >>= 8;
2070 }
2071 return Fcons (CHARSET_NAME (charset), val);
2072 }
2073
2074
2075 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2076 doc: /* Return the charset of highest priority that contains CH.
2077 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2078 from which to find the charset. It may also be a coding system. In
2079 that case, find the charset from what supported by that coding system. */)
2080 (Lisp_Object ch, Lisp_Object restriction)
2081 {
2082 struct charset *charset;
2083
2084 CHECK_CHARACTER (ch);
2085 if (NILP (restriction))
2086 charset = CHAR_CHARSET (XINT (ch));
2087 else
2088 {
2089 Lisp_Object charset_list;
2090
2091 if (CONSP (restriction))
2092 {
2093 for (charset_list = Qnil; CONSP (restriction);
2094 restriction = XCDR (restriction))
2095 {
2096 int id;
2097
2098 CHECK_CHARSET_GET_ID (XCAR (restriction), id);
2099 charset_list = Fcons (make_number (id), charset_list);
2100 }
2101 charset_list = Fnreverse (charset_list);
2102 }
2103 else
2104 charset_list = coding_system_charset_list (restriction);
2105 charset = char_charset (XINT (ch), charset_list, NULL);
2106 if (! charset)
2107 return Qnil;
2108 }
2109 return (CHARSET_NAME (charset));
2110 }
2111
2112
2113 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2114 doc: /*
2115 Return charset of a character in the current buffer at position POS.
2116 If POS is nil, it defauls to the current point.
2117 If POS is out of range, the value is nil. */)
2118 (Lisp_Object pos)
2119 {
2120 Lisp_Object ch;
2121 struct charset *charset;
2122
2123 ch = Fchar_after (pos);
2124 if (! INTEGERP (ch))
2125 return ch;
2126 charset = CHAR_CHARSET (XINT (ch));
2127 return (CHARSET_NAME (charset));
2128 }
2129
2130
2131 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2132 doc: /*
2133 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2134
2135 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2136 by their DIMENSION, CHARS, and FINAL-CHAR,
2137 whereas Emacs distinguishes them by charset symbol.
2138 See the documentation of the function `charset-info' for the meanings of
2139 DIMENSION, CHARS, and FINAL-CHAR. */)
2140 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2141 {
2142 int id;
2143 int chars_flag;
2144
2145 check_iso_charset_parameter (dimension, chars, final_char);
2146 chars_flag = XFASTINT (chars) == 96;
2147 id = ISO_CHARSET_TABLE (XFASTINT (dimension), chars_flag,
2148 XFASTINT (final_char));
2149 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2150 }
2151
2152
2153 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2154 0, 0, 0,
2155 doc: /*
2156 Internal use only.
2157 Clear temporary charset mapping tables.
2158 It should be called only from temacs invoked for dumping. */)
2159 (void)
2160 {
2161 if (temp_charset_work)
2162 {
2163 free (temp_charset_work);
2164 temp_charset_work = NULL;
2165 }
2166
2167 if (CHAR_TABLE_P (Vchar_unify_table))
2168 Foptimize_char_table (Vchar_unify_table, Qnil);
2169
2170 return Qnil;
2171 }
2172
2173 DEFUN ("charset-priority-list", Fcharset_priority_list,
2174 Scharset_priority_list, 0, 1, 0,
2175 doc: /* Return the list of charsets ordered by priority.
2176 HIGHESTP non-nil means just return the highest priority one. */)
2177 (Lisp_Object highestp)
2178 {
2179 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2180
2181 if (!NILP (highestp))
2182 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2183
2184 while (!NILP (list))
2185 {
2186 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2187 list = XCDR (list);
2188 }
2189 return Fnreverse (val);
2190 }
2191
2192 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2193 1, MANY, 0,
2194 doc: /* Assign higher priority to the charsets given as arguments.
2195 usage: (set-charset-priority &rest charsets) */)
2196 (int nargs, Lisp_Object *args)
2197 {
2198 Lisp_Object new_head, old_list, arglist[2];
2199 Lisp_Object list_2022, list_emacs_mule;
2200 int i, id;
2201
2202 old_list = Fcopy_sequence (Vcharset_ordered_list);
2203 new_head = Qnil;
2204 for (i = 0; i < nargs; i++)
2205 {
2206 CHECK_CHARSET_GET_ID (args[i], id);
2207 if (! NILP (Fmemq (make_number (id), old_list)))
2208 {
2209 old_list = Fdelq (make_number (id), old_list);
2210 new_head = Fcons (make_number (id), new_head);
2211 }
2212 }
2213 arglist[0] = Fnreverse (new_head);
2214 arglist[1] = Vcharset_non_preferred_head = old_list;
2215 Vcharset_ordered_list = Fnconc (2, arglist);
2216 charset_ordered_list_tick++;
2217
2218 charset_unibyte = -1;
2219 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2220 CONSP (old_list); old_list = XCDR (old_list))
2221 {
2222 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2223 list_2022 = Fcons (XCAR (old_list), list_2022);
2224 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2225 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2226 if (charset_unibyte < 0)
2227 {
2228 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2229
2230 if (CHARSET_DIMENSION (charset) == 1
2231 && CHARSET_ASCII_COMPATIBLE_P (charset)
2232 && CHARSET_MAX_CHAR (charset) >= 0x80)
2233 charset_unibyte = CHARSET_ID (charset);
2234 }
2235 }
2236 Viso_2022_charset_list = Fnreverse (list_2022);
2237 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2238 if (charset_unibyte < 0)
2239 charset_unibyte = charset_iso_8859_1;
2240
2241 return Qnil;
2242 }
2243
2244 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2245 0, 1, 0,
2246 doc: /* Internal use only.
2247 Return charset identification number of CHARSET. */)
2248 (Lisp_Object charset)
2249 {
2250 int id;
2251
2252 CHECK_CHARSET_GET_ID (charset, id);
2253 return make_number (id);
2254 }
2255
2256 \f
2257 void
2258 init_charset (void)
2259 {
2260 Lisp_Object tempdir;
2261 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2262 if (access ((char *) SDATA (tempdir), 0) < 0)
2263 {
2264 dir_warning ("Error: charsets directory (%s) does not exist.\n\
2265 Emacs will not function correctly without the character map files.\n\
2266 Please check your installation!\n",
2267 tempdir);
2268 /* TODO should this be a fatal error? (Bug#909) */
2269 }
2270
2271 Vcharset_map_path = Fcons (tempdir, Qnil);
2272 }
2273
2274
2275 void
2276 init_charset_once (void)
2277 {
2278 int i, j, k;
2279
2280 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2281 for (j = 0; j < ISO_MAX_CHARS; j++)
2282 for (k = 0; k < ISO_MAX_FINAL; k++)
2283 iso_charset_table[i][j][k] = -1;
2284
2285 for (i = 0; i < 256; i++)
2286 emacs_mule_charset[i] = NULL;
2287
2288 charset_jisx0201_roman = -1;
2289 charset_jisx0208_1978 = -1;
2290 charset_jisx0208 = -1;
2291 charset_ksc5601 = -1;
2292 }
2293
2294 #ifdef emacs
2295
2296 void
2297 syms_of_charset (void)
2298 {
2299 DEFSYM (Qcharsetp, "charsetp");
2300
2301 DEFSYM (Qascii, "ascii");
2302 DEFSYM (Qunicode, "unicode");
2303 DEFSYM (Qemacs, "emacs");
2304 DEFSYM (Qeight_bit, "eight-bit");
2305 DEFSYM (Qiso_8859_1, "iso-8859-1");
2306
2307 DEFSYM (Qgl, "gl");
2308 DEFSYM (Qgr, "gr");
2309
2310 staticpro (&Vcharset_ordered_list);
2311 Vcharset_ordered_list = Qnil;
2312
2313 staticpro (&Viso_2022_charset_list);
2314 Viso_2022_charset_list = Qnil;
2315
2316 staticpro (&Vemacs_mule_charset_list);
2317 Vemacs_mule_charset_list = Qnil;
2318
2319 /* Don't staticpro them here. It's done in syms_of_fns. */
2320 QCtest = intern (":test");
2321 Qeq = intern ("eq");
2322
2323 staticpro (&Vcharset_hash_table);
2324 {
2325 Lisp_Object args[2];
2326 args[0] = QCtest;
2327 args[1] = Qeq;
2328 Vcharset_hash_table = Fmake_hash_table (2, args);
2329 }
2330
2331 charset_table_size = 128;
2332 charset_table = ((struct charset *)
2333 xmalloc (sizeof (struct charset) * charset_table_size));
2334 charset_table_used = 0;
2335
2336 defsubr (&Scharsetp);
2337 defsubr (&Smap_charset_chars);
2338 defsubr (&Sdefine_charset_internal);
2339 defsubr (&Sdefine_charset_alias);
2340 defsubr (&Scharset_plist);
2341 defsubr (&Sset_charset_plist);
2342 defsubr (&Sunify_charset);
2343 defsubr (&Sget_unused_iso_final_char);
2344 defsubr (&Sdeclare_equiv_charset);
2345 defsubr (&Sfind_charset_region);
2346 defsubr (&Sfind_charset_string);
2347 defsubr (&Sdecode_char);
2348 defsubr (&Sencode_char);
2349 defsubr (&Ssplit_char);
2350 defsubr (&Smake_char);
2351 defsubr (&Schar_charset);
2352 defsubr (&Scharset_after);
2353 defsubr (&Siso_charset);
2354 defsubr (&Sclear_charset_maps);
2355 defsubr (&Scharset_priority_list);
2356 defsubr (&Sset_charset_priority);
2357 defsubr (&Scharset_id_internal);
2358
2359 DEFVAR_LISP ("charset-map-path", &Vcharset_map_path,
2360 doc: /* *List of directories to search for charset map files. */);
2361 Vcharset_map_path = Qnil;
2362
2363 DEFVAR_BOOL ("inhibit-load-charset-map", &inhibit_load_charset_map,
2364 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2365 inhibit_load_charset_map = 0;
2366
2367 DEFVAR_LISP ("charset-list", &Vcharset_list,
2368 doc: /* List of all charsets ever defined. */);
2369 Vcharset_list = Qnil;
2370
2371 DEFVAR_LISP ("current-iso639-language", &Vcurrent_iso639_language,
2372 doc: /* ISO639 language mnemonic symbol for the current language environment.
2373 If the current language environment is for multiple languages (e.g. "Latin-1"),
2374 the value may be a list of mnemonics. */);
2375 Vcurrent_iso639_language = Qnil;
2376
2377 charset_ascii
2378 = define_charset_internal (Qascii, 1, "\x00\x7F\x00\x00\x00\x00",
2379 0, 127, 'B', -1, 0, 1, 0, 0);
2380 charset_iso_8859_1
2381 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\x00\x00\x00\x00",
2382 0, 255, -1, -1, -1, 1, 0, 0);
2383 charset_unicode
2384 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10",
2385 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2386 charset_emacs
2387 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F",
2388 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2389 charset_eight_bit
2390 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\x00\x00\x00\x00",
2391 128, 255, -1, 0, -1, 0, 1,
2392 MAX_5_BYTE_CHAR + 1);
2393 charset_unibyte = charset_iso_8859_1;
2394 }
2395
2396 #endif /* emacs */
2397
2398 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
2399 (do not change this comment) */