]> code.delx.au - gnu-emacs/blob - src/charset.c
Remove redundant GCPROs around Ffuncall and Fapply calls. This
[gnu-emacs] / src / charset.c
1 /* Basic character set support.
2
3 Copyright (C) 2001-2014 Free Software Foundation, Inc.
4
5 Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
6 2005, 2006, 2007, 2008, 2009, 2010, 2011
7 National Institute of Advanced Industrial Science and Technology (AIST)
8 Registration Number H14PRO021
9
10 Copyright (C) 2003, 2004
11 National Institute of Advanced Industrial Science and Technology (AIST)
12 Registration Number H13PRO009
13
14 This file is part of GNU Emacs.
15
16 GNU Emacs is free software: you can redistribute it and/or modify
17 it under the terms of the GNU General Public License as published by
18 the Free Software Foundation, either version 3 of the License, or
19 (at your option) any later version.
20
21 GNU Emacs is distributed in the hope that it will be useful,
22 but WITHOUT ANY WARRANTY; without even the implied warranty of
23 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
24 GNU General Public License for more details.
25
26 You should have received a copy of the GNU General Public License
27 along with GNU Emacs. If not, see <http://www.gnu.org/licenses/>. */
28
29 #include <config.h>
30
31 #include <errno.h>
32 #include <stdio.h>
33 #include <unistd.h>
34 #include <limits.h>
35 #include <sys/types.h>
36 #include <c-ctype.h>
37 #include "lisp.h"
38 #include "character.h"
39 #include "charset.h"
40 #include "coding.h"
41 #include "disptab.h"
42 #include "buffer.h"
43
44 /*** GENERAL NOTES on CODED CHARACTER SETS (CHARSETS) ***
45
46 A coded character set ("charset" hereafter) is a meaningful
47 collection (i.e. language, culture, functionality, etc.) of
48 characters. Emacs handles multiple charsets at once. In Emacs Lisp
49 code, a charset is represented by a symbol. In C code, a charset is
50 represented by its ID number or by a pointer to a struct charset.
51
52 The actual information about each charset is stored in two places.
53 Lispy information is stored in the hash table Vcharset_hash_table as
54 a vector (charset attributes). The other information is stored in
55 charset_table as a struct charset.
56
57 */
58
59 /* Hash table that contains attributes of each charset. Keys are
60 charset symbols, and values are vectors of charset attributes. */
61 Lisp_Object Vcharset_hash_table;
62
63 /* Table of struct charset. */
64 struct charset *charset_table;
65
66 static ptrdiff_t charset_table_size;
67 static int charset_table_used;
68
69 Lisp_Object Qcharsetp;
70
71 /* Special charset symbols. */
72 Lisp_Object Qascii;
73 static Lisp_Object Qeight_bit;
74 static Lisp_Object Qiso_8859_1;
75 static Lisp_Object Qunicode;
76 static Lisp_Object Qemacs;
77
78 /* The corresponding charsets. */
79 int charset_ascii;
80 int charset_eight_bit;
81 static int charset_iso_8859_1;
82 int charset_unicode;
83 static int charset_emacs;
84
85 /* The other special charsets. */
86 int charset_jisx0201_roman;
87 int charset_jisx0208_1978;
88 int charset_jisx0208;
89 int charset_ksc5601;
90
91 /* Value of charset attribute `charset-iso-plane'. */
92 static Lisp_Object Qgl, Qgr;
93
94 /* Charset of unibyte characters. */
95 int charset_unibyte;
96
97 /* List of charsets ordered by the priority. */
98 Lisp_Object Vcharset_ordered_list;
99
100 /* Sub-list of Vcharset_ordered_list that contains all non-preferred
101 charsets. */
102 Lisp_Object Vcharset_non_preferred_head;
103
104 /* Incremented everytime we change Vcharset_ordered_list. This is
105 unsigned short so that it fits in Lisp_Int and never matches
106 -1. */
107 unsigned short charset_ordered_list_tick;
108
109 /* List of iso-2022 charsets. */
110 Lisp_Object Viso_2022_charset_list;
111
112 /* List of emacs-mule charsets. */
113 Lisp_Object Vemacs_mule_charset_list;
114
115 int emacs_mule_charset[256];
116
117 /* Mapping table from ISO2022's charset (specified by DIMENSION,
118 CHARS, and FINAL-CHAR) to Emacs' charset. */
119 int iso_charset_table[ISO_MAX_DIMENSION][ISO_MAX_CHARS][ISO_MAX_FINAL];
120
121 #define CODE_POINT_TO_INDEX(charset, code) \
122 ((charset)->code_linear_p \
123 ? (int) ((code) - (charset)->min_code) \
124 : (((charset)->code_space_mask[(code) >> 24] & 0x8) \
125 && ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
126 && ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
127 && ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
128 ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
129 * (charset)->code_space[11]) \
130 + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
131 * (charset)->code_space[7]) \
132 + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
133 * (charset)->code_space[3]) \
134 + (((code) & 0xFF) - (charset)->code_space[0]) \
135 - ((charset)->char_index_offset)) \
136 : -1)
137
138
139 /* Return the code-point for the character index IDX in CHARSET.
140 IDX should be an unsigned int variable in a valid range (which is
141 always in nonnegative int range too). IDX contains garbage afterwards. */
142
143 #define INDEX_TO_CODE_POINT(charset, idx) \
144 ((charset)->code_linear_p \
145 ? (idx) + (charset)->min_code \
146 : (idx += (charset)->char_index_offset, \
147 (((charset)->code_space[0] + (idx) % (charset)->code_space[2]) \
148 | (((charset)->code_space[4] \
149 + ((idx) / (charset)->code_space[3] % (charset)->code_space[6])) \
150 << 8) \
151 | (((charset)->code_space[8] \
152 + ((idx) / (charset)->code_space[7] % (charset)->code_space[10])) \
153 << 16) \
154 | (((charset)->code_space[12] + ((idx) / (charset)->code_space[11])) \
155 << 24))))
156
157 /* Structure to hold mapping tables for a charset. Used by temacs
158 invoked for dumping. */
159
160 static struct
161 {
162 /* The current charset for which the following tables are setup. */
163 struct charset *current;
164
165 /* 1 iff the following table is used for encoder. */
166 short for_encoder;
167
168 /* When the following table is used for encoding, minimum and
169 maximum character of the current charset. */
170 int min_char, max_char;
171
172 /* A Unicode character corresponding to the code index 0 (i.e. the
173 minimum code-point) of the current charset, or -1 if the code
174 index 0 is not a Unicode character. This is checked when
175 table.encoder[CHAR] is zero. */
176 int zero_index_char;
177
178 union {
179 /* Table mapping code-indices (not code-points) of the current
180 charset to Unicode characters. If decoder[CHAR] is -1, CHAR
181 doesn't belong to the current charset. */
182 int decoder[0x10000];
183 /* Table mapping Unicode characters to code-indices of the current
184 charset. The first 0x10000 elements are for BMP (0..0xFFFF),
185 and the last 0x10000 are for SMP (0x10000..0x1FFFF) or SIP
186 (0x20000..0x2FFFF). Note that there is no charset map that
187 uses both SMP and SIP. */
188 unsigned short encoder[0x20000];
189 } table;
190 } *temp_charset_work;
191
192 #define SET_TEMP_CHARSET_WORK_ENCODER(C, CODE) \
193 do { \
194 if ((CODE) == 0) \
195 temp_charset_work->zero_index_char = (C); \
196 else if ((C) < 0x20000) \
197 temp_charset_work->table.encoder[(C)] = (CODE); \
198 else \
199 temp_charset_work->table.encoder[(C) - 0x10000] = (CODE); \
200 } while (0)
201
202 #define GET_TEMP_CHARSET_WORK_ENCODER(C) \
203 ((C) == temp_charset_work->zero_index_char ? 0 \
204 : (C) < 0x20000 ? (temp_charset_work->table.encoder[(C)] \
205 ? (int) temp_charset_work->table.encoder[(C)] : -1) \
206 : temp_charset_work->table.encoder[(C) - 0x10000] \
207 ? temp_charset_work->table.encoder[(C) - 0x10000] : -1)
208
209 #define SET_TEMP_CHARSET_WORK_DECODER(C, CODE) \
210 (temp_charset_work->table.decoder[(CODE)] = (C))
211
212 #define GET_TEMP_CHARSET_WORK_DECODER(CODE) \
213 (temp_charset_work->table.decoder[(CODE)])
214 \f
215
216 /* Set to 1 to warn that a charset map is loaded and thus a buffer
217 text and a string data may be relocated. */
218 bool charset_map_loaded;
219
220 struct charset_map_entries
221 {
222 struct {
223 unsigned from, to;
224 int c;
225 } entry[0x10000];
226 struct charset_map_entries *next;
227 };
228
229 /* Load the mapping information of CHARSET from ENTRIES for
230 initializing (CONTROL_FLAG == 0), decoding (CONTROL_FLAG == 1), and
231 encoding (CONTROL_FLAG == 2).
232
233 If CONTROL_FLAG is 0, setup CHARSET->min_char, CHARSET->max_char,
234 and CHARSET->fast_map.
235
236 If CONTROL_FLAG is 1, setup the following tables according to
237 CHARSET->method and inhibit_load_charset_map.
238
239 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
240 ----------------------+--------------------+---------------------------
241 CHARSET_METHOD_MAP | CHARSET->decoder | temp_charset_work->decoder
242 ----------------------+--------------------+---------------------------
243 CHARSET_METHOD_OFFSET | Vchar_unify_table | temp_charset_work->decoder
244
245 If CONTROL_FLAG is 2, setup the following tables.
246
247 CHARSET->method | inhibit_lcm == 0 | inhibit_lcm == 1
248 ----------------------+--------------------+---------------------------
249 CHARSET_METHOD_MAP | CHARSET->encoder | temp_charset_work->encoder
250 ----------------------+--------------------+--------------------------
251 CHARSET_METHOD_OFFSET | CHARSET->deunifier | temp_charset_work->encoder
252 */
253
254 static void
255 load_charset_map (struct charset *charset, struct charset_map_entries *entries, int n_entries, int control_flag)
256 {
257 Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
258 unsigned max_code = CHARSET_MAX_CODE (charset);
259 bool ascii_compatible_p = charset->ascii_compatible_p;
260 int min_char, max_char, nonascii_min_char;
261 int i;
262 unsigned char *fast_map = charset->fast_map;
263
264 if (n_entries <= 0)
265 return;
266
267 if (control_flag)
268 {
269 if (! inhibit_load_charset_map)
270 {
271 if (control_flag == 1)
272 {
273 if (charset->method == CHARSET_METHOD_MAP)
274 {
275 int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
276
277 vec = Fmake_vector (make_number (n), make_number (-1));
278 set_charset_attr (charset, charset_decoder, vec);
279 }
280 else
281 {
282 char_table_set_range (Vchar_unify_table,
283 charset->min_char, charset->max_char,
284 Qnil);
285 }
286 }
287 else
288 {
289 table = Fmake_char_table (Qnil, Qnil);
290 set_charset_attr (charset,
291 (charset->method == CHARSET_METHOD_MAP
292 ? charset_encoder : charset_deunifier),
293 table);
294 }
295 }
296 else
297 {
298 if (! temp_charset_work)
299 temp_charset_work = xmalloc (sizeof *temp_charset_work);
300 if (control_flag == 1)
301 {
302 memset (temp_charset_work->table.decoder, -1,
303 sizeof (int) * 0x10000);
304 }
305 else
306 {
307 memset (temp_charset_work->table.encoder, 0,
308 sizeof (unsigned short) * 0x20000);
309 temp_charset_work->zero_index_char = -1;
310 }
311 temp_charset_work->current = charset;
312 temp_charset_work->for_encoder = (control_flag == 2);
313 control_flag += 2;
314 }
315 charset_map_loaded = 1;
316 }
317
318 min_char = max_char = entries->entry[0].c;
319 nonascii_min_char = MAX_CHAR;
320 for (i = 0; i < n_entries; i++)
321 {
322 unsigned from, to;
323 int from_index, to_index, lim_index;
324 int from_c, to_c;
325 int idx = i % 0x10000;
326
327 if (i > 0 && idx == 0)
328 entries = entries->next;
329 from = entries->entry[idx].from;
330 to = entries->entry[idx].to;
331 from_c = entries->entry[idx].c;
332 from_index = CODE_POINT_TO_INDEX (charset, from);
333 if (from == to)
334 {
335 to_index = from_index;
336 to_c = from_c;
337 }
338 else
339 {
340 to_index = CODE_POINT_TO_INDEX (charset, to);
341 to_c = from_c + (to_index - from_index);
342 }
343 if (from_index < 0 || to_index < 0)
344 continue;
345 lim_index = to_index + 1;
346
347 if (to_c > max_char)
348 max_char = to_c;
349 else if (from_c < min_char)
350 min_char = from_c;
351
352 if (control_flag == 1)
353 {
354 if (charset->method == CHARSET_METHOD_MAP)
355 for (; from_index < lim_index; from_index++, from_c++)
356 ASET (vec, from_index, make_number (from_c));
357 else
358 for (; from_index < lim_index; from_index++, from_c++)
359 CHAR_TABLE_SET (Vchar_unify_table,
360 CHARSET_CODE_OFFSET (charset) + from_index,
361 make_number (from_c));
362 }
363 else if (control_flag == 2)
364 {
365 if (charset->method == CHARSET_METHOD_MAP
366 && CHARSET_COMPACT_CODES_P (charset))
367 for (; from_index < lim_index; from_index++, from_c++)
368 {
369 unsigned code = from_index;
370 code = INDEX_TO_CODE_POINT (charset, code);
371
372 if (NILP (CHAR_TABLE_REF (table, from_c)))
373 CHAR_TABLE_SET (table, from_c, make_number (code));
374 }
375 else
376 for (; from_index < lim_index; from_index++, from_c++)
377 {
378 if (NILP (CHAR_TABLE_REF (table, from_c)))
379 CHAR_TABLE_SET (table, from_c, make_number (from_index));
380 }
381 }
382 else if (control_flag == 3)
383 for (; from_index < lim_index; from_index++, from_c++)
384 SET_TEMP_CHARSET_WORK_DECODER (from_c, from_index);
385 else if (control_flag == 4)
386 for (; from_index < lim_index; from_index++, from_c++)
387 SET_TEMP_CHARSET_WORK_ENCODER (from_c, from_index);
388 else /* control_flag == 0 */
389 {
390 if (ascii_compatible_p)
391 {
392 if (! ASCII_CHAR_P (from_c))
393 {
394 if (from_c < nonascii_min_char)
395 nonascii_min_char = from_c;
396 }
397 else if (! ASCII_CHAR_P (to_c))
398 {
399 nonascii_min_char = 0x80;
400 }
401 }
402
403 for (; from_c <= to_c; from_c++)
404 CHARSET_FAST_MAP_SET (from_c, fast_map);
405 }
406 }
407
408 if (control_flag == 0)
409 {
410 CHARSET_MIN_CHAR (charset) = (ascii_compatible_p
411 ? nonascii_min_char : min_char);
412 CHARSET_MAX_CHAR (charset) = max_char;
413 }
414 else if (control_flag == 4)
415 {
416 temp_charset_work->min_char = min_char;
417 temp_charset_work->max_char = max_char;
418 }
419 }
420
421
422 /* Read a hexadecimal number (preceded by "0x") from the file FP while
423 paying attention to comment character '#'. */
424
425 static unsigned
426 read_hex (FILE *fp, bool *eof, bool *overflow)
427 {
428 int c;
429 unsigned n;
430
431 while ((c = getc (fp)) != EOF)
432 {
433 if (c == '#')
434 {
435 while ((c = getc (fp)) != EOF && c != '\n');
436 }
437 else if (c == '0')
438 {
439 if ((c = getc (fp)) == EOF || c == 'x')
440 break;
441 }
442 }
443 if (c == EOF)
444 {
445 *eof = 1;
446 return 0;
447 }
448 n = 0;
449 while (c_isxdigit (c = getc (fp)))
450 {
451 if (UINT_MAX >> 4 < n)
452 *overflow = 1;
453 n = ((n << 4)
454 | (c - ('0' <= c && c <= '9' ? '0'
455 : 'A' <= c && c <= 'F' ? 'A' - 10
456 : 'a' - 10)));
457 }
458 if (c != EOF)
459 ungetc (c, fp);
460 return n;
461 }
462
463 /* Return a mapping vector for CHARSET loaded from MAPFILE.
464 Each line of MAPFILE has this form
465 0xAAAA 0xCCCC
466 where 0xAAAA is a code-point and 0xCCCC is the corresponding
467 character code, or this form
468 0xAAAA-0xBBBB 0xCCCC
469 where 0xAAAA and 0xBBBB are code-points specifying a range, and
470 0xCCCC is the first character code of the range.
471
472 The returned vector has this form:
473 [ CODE1 CHAR1 CODE2 CHAR2 .... ]
474 where CODE1 is a code-point or a cons of code-points specifying a
475 range.
476
477 Note that this function uses `openp' to open MAPFILE but ignores
478 `file-name-handler-alist' to avoid running any Lisp code. */
479
480 static void
481 load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile,
482 int control_flag)
483 {
484 unsigned min_code = CHARSET_MIN_CODE (charset);
485 unsigned max_code = CHARSET_MAX_CODE (charset);
486 int fd;
487 FILE *fp;
488 Lisp_Object suffixes;
489 struct charset_map_entries *head, *entries;
490 int n_entries;
491 ptrdiff_t count;
492
493 suffixes = list2 (build_string (".map"), build_string (".TXT"));
494
495 count = SPECPDL_INDEX ();
496 record_unwind_protect_nothing ();
497 specbind (Qfile_name_handler_alist, Qnil);
498 fd = openp (Vcharset_map_path, mapfile, suffixes, NULL, Qnil, false);
499 fp = fd < 0 ? 0 : fdopen (fd, "r");
500 if (!fp)
501 {
502 int open_errno = errno;
503 emacs_close (fd);
504 report_file_errno ("Loading charset map", mapfile, open_errno);
505 }
506 set_unwind_protect_ptr (count, fclose_unwind, fp);
507 unbind_to (count + 1, Qnil);
508
509 /* Use record_xmalloc, as `charset_map_entries' is
510 large (larger than MAX_ALLOCA). */
511 head = record_xmalloc (sizeof *head);
512 entries = head;
513 memset (entries, 0, sizeof (struct charset_map_entries));
514
515 n_entries = 0;
516 while (1)
517 {
518 unsigned from, to, c;
519 int idx;
520 bool eof = 0, overflow = 0;
521
522 from = read_hex (fp, &eof, &overflow);
523 if (eof)
524 break;
525 if (getc (fp) == '-')
526 to = read_hex (fp, &eof, &overflow);
527 else
528 to = from;
529 if (eof)
530 break;
531 c = read_hex (fp, &eof, &overflow);
532 if (eof)
533 break;
534
535 if (overflow)
536 continue;
537 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
538 continue;
539
540 if (n_entries == 0x10000)
541 {
542 entries->next = record_xmalloc (sizeof *entries->next);
543 entries = entries->next;
544 memset (entries, 0, sizeof (struct charset_map_entries));
545 n_entries = 0;
546 }
547 idx = n_entries;
548 entries->entry[idx].from = from;
549 entries->entry[idx].to = to;
550 entries->entry[idx].c = c;
551 n_entries++;
552 }
553 fclose (fp);
554 clear_unwind_protect (count);
555
556 load_charset_map (charset, head, n_entries, control_flag);
557 unbind_to (count, Qnil);
558 }
559
560 static void
561 load_charset_map_from_vector (struct charset *charset, Lisp_Object vec, int control_flag)
562 {
563 unsigned min_code = CHARSET_MIN_CODE (charset);
564 unsigned max_code = CHARSET_MAX_CODE (charset);
565 struct charset_map_entries *head, *entries;
566 int n_entries;
567 int len = ASIZE (vec);
568 int i;
569 USE_SAFE_ALLOCA;
570
571 if (len % 2 == 1)
572 {
573 add_to_log ("Failure in loading charset map: %V", vec, Qnil);
574 return;
575 }
576
577 /* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
578 large (larger than MAX_ALLOCA). */
579 head = SAFE_ALLOCA (sizeof *head);
580 entries = head;
581 memset (entries, 0, sizeof (struct charset_map_entries));
582
583 n_entries = 0;
584 for (i = 0; i < len; i += 2)
585 {
586 Lisp_Object val, val2;
587 unsigned from, to;
588 EMACS_INT c;
589 int idx;
590
591 val = AREF (vec, i);
592 if (CONSP (val))
593 {
594 val2 = XCDR (val);
595 val = XCAR (val);
596 from = XFASTINT (val);
597 to = XFASTINT (val2);
598 }
599 else
600 from = to = XFASTINT (val);
601 val = AREF (vec, i + 1);
602 CHECK_NATNUM (val);
603 c = XFASTINT (val);
604
605 if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
606 continue;
607
608 if (n_entries > 0 && (n_entries % 0x10000) == 0)
609 {
610 entries->next = SAFE_ALLOCA (sizeof *entries->next);
611 entries = entries->next;
612 memset (entries, 0, sizeof (struct charset_map_entries));
613 }
614 idx = n_entries % 0x10000;
615 entries->entry[idx].from = from;
616 entries->entry[idx].to = to;
617 entries->entry[idx].c = c;
618 n_entries++;
619 }
620
621 load_charset_map (charset, head, n_entries, control_flag);
622 SAFE_FREE ();
623 }
624
625
626 /* Load a mapping table for CHARSET. CONTROL-FLAG tells what kind of
627 map it is (see the comment of load_charset_map for the detail). */
628
629 static void
630 load_charset (struct charset *charset, int control_flag)
631 {
632 Lisp_Object map;
633
634 if (inhibit_load_charset_map
635 && temp_charset_work
636 && charset == temp_charset_work->current
637 && ((control_flag == 2) == temp_charset_work->for_encoder))
638 return;
639
640 if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
641 map = CHARSET_MAP (charset);
642 else
643 {
644 if (! CHARSET_UNIFIED_P (charset))
645 emacs_abort ();
646 map = CHARSET_UNIFY_MAP (charset);
647 }
648 if (STRINGP (map))
649 load_charset_map_from_file (charset, map, control_flag);
650 else
651 load_charset_map_from_vector (charset, map, control_flag);
652 }
653
654
655 DEFUN ("charsetp", Fcharsetp, Scharsetp, 1, 1, 0,
656 doc: /* Return non-nil if and only if OBJECT is a charset.*/)
657 (Lisp_Object object)
658 {
659 return (CHARSETP (object) ? Qt : Qnil);
660 }
661
662
663 static void
664 map_charset_for_dump (void (*c_function) (Lisp_Object, Lisp_Object),
665 Lisp_Object function, Lisp_Object arg,
666 unsigned int from, unsigned int to)
667 {
668 int from_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, from);
669 int to_idx = CODE_POINT_TO_INDEX (temp_charset_work->current, to);
670 Lisp_Object range = Fcons (Qnil, Qnil);
671 int c, stop;
672
673 c = temp_charset_work->min_char;
674 stop = (temp_charset_work->max_char < 0x20000
675 ? temp_charset_work->max_char : 0xFFFF);
676
677 while (1)
678 {
679 int idx = GET_TEMP_CHARSET_WORK_ENCODER (c);
680
681 if (idx >= from_idx && idx <= to_idx)
682 {
683 if (NILP (XCAR (range)))
684 XSETCAR (range, make_number (c));
685 }
686 else if (! NILP (XCAR (range)))
687 {
688 XSETCDR (range, make_number (c - 1));
689 if (c_function)
690 (*c_function) (arg, range);
691 else
692 call2 (function, range, arg);
693 XSETCAR (range, Qnil);
694 }
695 if (c == stop)
696 {
697 if (c == temp_charset_work->max_char)
698 {
699 if (! NILP (XCAR (range)))
700 {
701 XSETCDR (range, make_number (c));
702 if (c_function)
703 (*c_function) (arg, range);
704 else
705 call2 (function, range, arg);
706 }
707 break;
708 }
709 c = 0x1FFFF;
710 stop = temp_charset_work->max_char;
711 }
712 c++;
713 }
714 }
715
716 void
717 map_charset_chars (void (*c_function)(Lisp_Object, Lisp_Object), Lisp_Object function,
718 Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
719 {
720 Lisp_Object range;
721 bool partial = (from > CHARSET_MIN_CODE (charset)
722 || to < CHARSET_MAX_CODE (charset));
723
724 if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
725 {
726 int from_idx = CODE_POINT_TO_INDEX (charset, from);
727 int to_idx = CODE_POINT_TO_INDEX (charset, to);
728 int from_c = from_idx + CHARSET_CODE_OFFSET (charset);
729 int to_c = to_idx + CHARSET_CODE_OFFSET (charset);
730
731 if (CHARSET_UNIFIED_P (charset))
732 {
733 if (! CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
734 load_charset (charset, 2);
735 if (CHAR_TABLE_P (CHARSET_DEUNIFIER (charset)))
736 map_char_table_for_charset (c_function, function,
737 CHARSET_DEUNIFIER (charset), arg,
738 partial ? charset : NULL, from, to);
739 else
740 map_charset_for_dump (c_function, function, arg, from, to);
741 }
742
743 range = Fcons (make_number (from_c), make_number (to_c));
744 if (NILP (function))
745 (*c_function) (arg, range);
746 else
747 call2 (function, range, arg);
748 }
749 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_MAP)
750 {
751 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
752 load_charset (charset, 2);
753 if (CHAR_TABLE_P (CHARSET_ENCODER (charset)))
754 map_char_table_for_charset (c_function, function,
755 CHARSET_ENCODER (charset), arg,
756 partial ? charset : NULL, from, to);
757 else
758 map_charset_for_dump (c_function, function, arg, from, to);
759 }
760 else if (CHARSET_METHOD (charset) == CHARSET_METHOD_SUBSET)
761 {
762 Lisp_Object subset_info;
763 int offset;
764
765 subset_info = CHARSET_SUBSET (charset);
766 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
767 offset = XINT (AREF (subset_info, 3));
768 from -= offset;
769 if (from < XFASTINT (AREF (subset_info, 1)))
770 from = XFASTINT (AREF (subset_info, 1));
771 to -= offset;
772 if (to > XFASTINT (AREF (subset_info, 2)))
773 to = XFASTINT (AREF (subset_info, 2));
774 map_charset_chars (c_function, function, arg, charset, from, to);
775 }
776 else /* i.e. CHARSET_METHOD_SUPERSET */
777 {
778 Lisp_Object parents;
779
780 for (parents = CHARSET_SUPERSET (charset); CONSP (parents);
781 parents = XCDR (parents))
782 {
783 int offset;
784 unsigned this_from, this_to;
785
786 charset = CHARSET_FROM_ID (XFASTINT (XCAR (XCAR (parents))));
787 offset = XINT (XCDR (XCAR (parents)));
788 this_from = from > offset ? from - offset : 0;
789 this_to = to > offset ? to - offset : 0;
790 if (this_from < CHARSET_MIN_CODE (charset))
791 this_from = CHARSET_MIN_CODE (charset);
792 if (this_to > CHARSET_MAX_CODE (charset))
793 this_to = CHARSET_MAX_CODE (charset);
794 map_charset_chars (c_function, function, arg, charset,
795 this_from, this_to);
796 }
797 }
798 }
799
800 DEFUN ("map-charset-chars", Fmap_charset_chars, Smap_charset_chars, 2, 5, 0,
801 doc: /* Call FUNCTION for all characters in CHARSET.
802 FUNCTION is called with an argument RANGE and the optional 3rd
803 argument ARG.
804
805 RANGE is a cons (FROM . TO), where FROM and TO indicate a range of
806 characters contained in CHARSET.
807
808 The optional 4th and 5th arguments FROM-CODE and TO-CODE specify the
809 range of code points (in CHARSET) of target characters. */)
810 (Lisp_Object function, Lisp_Object charset, Lisp_Object arg, Lisp_Object from_code, Lisp_Object to_code)
811 {
812 struct charset *cs;
813 unsigned from, to;
814
815 CHECK_CHARSET_GET_CHARSET (charset, cs);
816 if (NILP (from_code))
817 from = CHARSET_MIN_CODE (cs);
818 else
819 {
820 from = XINT (from_code);
821 if (from < CHARSET_MIN_CODE (cs))
822 from = CHARSET_MIN_CODE (cs);
823 }
824 if (NILP (to_code))
825 to = CHARSET_MAX_CODE (cs);
826 else
827 {
828 to = XINT (to_code);
829 if (to > CHARSET_MAX_CODE (cs))
830 to = CHARSET_MAX_CODE (cs);
831 }
832 map_charset_chars (NULL, function, arg, cs, from, to);
833 return Qnil;
834 }
835
836
837 /* Define a charset according to the arguments. The Nth argument is
838 the Nth attribute of the charset (the last attribute `charset-id'
839 is not included). See the docstring of `define-charset' for the
840 detail. */
841
842 DEFUN ("define-charset-internal", Fdefine_charset_internal,
843 Sdefine_charset_internal, charset_arg_max, MANY, 0,
844 doc: /* For internal use only.
845 usage: (define-charset-internal ...) */)
846 (ptrdiff_t nargs, Lisp_Object *args)
847 {
848 /* Charset attr vector. */
849 Lisp_Object attrs;
850 Lisp_Object val;
851 EMACS_UINT hash_code;
852 struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
853 int i, j;
854 struct charset charset;
855 int id;
856 int dimension;
857 bool new_definition_p;
858 int nchars;
859
860 if (nargs != charset_arg_max)
861 return Fsignal (Qwrong_number_of_arguments,
862 Fcons (intern ("define-charset-internal"),
863 make_number (nargs)));
864
865 attrs = Fmake_vector (make_number (charset_attr_max), Qnil);
866
867 CHECK_SYMBOL (args[charset_arg_name]);
868 ASET (attrs, charset_name, args[charset_arg_name]);
869
870 val = args[charset_arg_code_space];
871 for (i = 0, dimension = 0, nchars = 1; ; i++)
872 {
873 Lisp_Object min_byte_obj, max_byte_obj;
874 int min_byte, max_byte;
875
876 min_byte_obj = Faref (val, make_number (i * 2));
877 max_byte_obj = Faref (val, make_number (i * 2 + 1));
878 CHECK_RANGED_INTEGER (min_byte_obj, 0, 255);
879 min_byte = XINT (min_byte_obj);
880 CHECK_RANGED_INTEGER (max_byte_obj, min_byte, 255);
881 max_byte = XINT (max_byte_obj);
882 charset.code_space[i * 4] = min_byte;
883 charset.code_space[i * 4 + 1] = max_byte;
884 charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
885 if (max_byte > 0)
886 dimension = i + 1;
887 if (i == 3)
888 break;
889 nchars *= charset.code_space[i * 4 + 2];
890 charset.code_space[i * 4 + 3] = nchars;
891 }
892
893 val = args[charset_arg_dimension];
894 if (NILP (val))
895 charset.dimension = dimension;
896 else
897 {
898 CHECK_RANGED_INTEGER (val, 1, 4);
899 charset.dimension = XINT (val);
900 }
901
902 charset.code_linear_p
903 = (charset.dimension == 1
904 || (charset.code_space[2] == 256
905 && (charset.dimension == 2
906 || (charset.code_space[6] == 256
907 && (charset.dimension == 3
908 || charset.code_space[10] == 256)))));
909
910 if (! charset.code_linear_p)
911 {
912 charset.code_space_mask = xzalloc (256);
913 for (i = 0; i < 4; i++)
914 for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
915 j++)
916 charset.code_space_mask[j] |= (1 << i);
917 }
918
919 charset.iso_chars_96 = charset.code_space[2] == 96;
920
921 charset.min_code = (charset.code_space[0]
922 | (charset.code_space[4] << 8)
923 | (charset.code_space[8] << 16)
924 | ((unsigned) charset.code_space[12] << 24));
925 charset.max_code = (charset.code_space[1]
926 | (charset.code_space[5] << 8)
927 | (charset.code_space[9] << 16)
928 | ((unsigned) charset.code_space[13] << 24));
929 charset.char_index_offset = 0;
930
931 val = args[charset_arg_min_code];
932 if (! NILP (val))
933 {
934 unsigned code = cons_to_unsigned (val, UINT_MAX);
935
936 if (code < charset.min_code
937 || code > charset.max_code)
938 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
939 make_fixnum_or_float (charset.max_code), val);
940 charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
941 charset.min_code = code;
942 }
943
944 val = args[charset_arg_max_code];
945 if (! NILP (val))
946 {
947 unsigned code = cons_to_unsigned (val, UINT_MAX);
948
949 if (code < charset.min_code
950 || code > charset.max_code)
951 args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
952 make_fixnum_or_float (charset.max_code), val);
953 charset.max_code = code;
954 }
955
956 charset.compact_codes_p = charset.max_code < 0x10000;
957
958 val = args[charset_arg_invalid_code];
959 if (NILP (val))
960 {
961 if (charset.min_code > 0)
962 charset.invalid_code = 0;
963 else
964 {
965 if (charset.max_code < UINT_MAX)
966 charset.invalid_code = charset.max_code + 1;
967 else
968 error ("Attribute :invalid-code must be specified");
969 }
970 }
971 else
972 charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
973
974 val = args[charset_arg_iso_final];
975 if (NILP (val))
976 charset.iso_final = -1;
977 else
978 {
979 CHECK_NUMBER (val);
980 if (XINT (val) < '0' || XINT (val) > 127)
981 error ("Invalid iso-final-char: %"pI"d", XINT (val));
982 charset.iso_final = XINT (val);
983 }
984
985 val = args[charset_arg_iso_revision];
986 if (NILP (val))
987 charset.iso_revision = -1;
988 else
989 {
990 CHECK_RANGED_INTEGER (val, -1, 63);
991 charset.iso_revision = XINT (val);
992 }
993
994 val = args[charset_arg_emacs_mule_id];
995 if (NILP (val))
996 charset.emacs_mule_id = -1;
997 else
998 {
999 CHECK_NATNUM (val);
1000 if ((XINT (val) > 0 && XINT (val) <= 128) || XINT (val) >= 256)
1001 error ("Invalid emacs-mule-id: %"pI"d", XINT (val));
1002 charset.emacs_mule_id = XINT (val);
1003 }
1004
1005 charset.ascii_compatible_p = ! NILP (args[charset_arg_ascii_compatible_p]);
1006
1007 charset.supplementary_p = ! NILP (args[charset_arg_supplementary_p]);
1008
1009 charset.unified_p = 0;
1010
1011 memset (charset.fast_map, 0, sizeof (charset.fast_map));
1012
1013 if (! NILP (args[charset_arg_code_offset]))
1014 {
1015 val = args[charset_arg_code_offset];
1016 CHECK_CHARACTER (val);
1017
1018 charset.method = CHARSET_METHOD_OFFSET;
1019 charset.code_offset = XINT (val);
1020
1021 i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
1022 if (MAX_CHAR - charset.code_offset < i)
1023 error ("Unsupported max char: %d", charset.max_char);
1024 charset.max_char = i + charset.code_offset;
1025 i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
1026 charset.min_char = i + charset.code_offset;
1027
1028 i = (charset.min_char >> 7) << 7;
1029 for (; i < 0x10000 && i <= charset.max_char; i += 128)
1030 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1031 i = (i >> 12) << 12;
1032 for (; i <= charset.max_char; i += 0x1000)
1033 CHARSET_FAST_MAP_SET (i, charset.fast_map);
1034 if (charset.code_offset == 0 && charset.max_char >= 0x80)
1035 charset.ascii_compatible_p = 1;
1036 }
1037 else if (! NILP (args[charset_arg_map]))
1038 {
1039 val = args[charset_arg_map];
1040 ASET (attrs, charset_map, val);
1041 charset.method = CHARSET_METHOD_MAP;
1042 }
1043 else if (! NILP (args[charset_arg_subset]))
1044 {
1045 Lisp_Object parent;
1046 Lisp_Object parent_min_code, parent_max_code, parent_code_offset;
1047 struct charset *parent_charset;
1048
1049 val = args[charset_arg_subset];
1050 parent = Fcar (val);
1051 CHECK_CHARSET_GET_CHARSET (parent, parent_charset);
1052 parent_min_code = Fnth (make_number (1), val);
1053 CHECK_NATNUM (parent_min_code);
1054 parent_max_code = Fnth (make_number (2), val);
1055 CHECK_NATNUM (parent_max_code);
1056 parent_code_offset = Fnth (make_number (3), val);
1057 CHECK_NUMBER (parent_code_offset);
1058 val = make_uninit_vector (4);
1059 ASET (val, 0, make_number (parent_charset->id));
1060 ASET (val, 1, parent_min_code);
1061 ASET (val, 2, parent_max_code);
1062 ASET (val, 3, parent_code_offset);
1063 ASET (attrs, charset_subset, val);
1064
1065 charset.method = CHARSET_METHOD_SUBSET;
1066 /* Here, we just copy the parent's fast_map. It's not accurate,
1067 but at least it works for quickly detecting which character
1068 DOESN'T belong to this charset. */
1069 for (i = 0; i < 190; i++)
1070 charset.fast_map[i] = parent_charset->fast_map[i];
1071
1072 /* We also copy these for parents. */
1073 charset.min_char = parent_charset->min_char;
1074 charset.max_char = parent_charset->max_char;
1075 }
1076 else if (! NILP (args[charset_arg_superset]))
1077 {
1078 val = args[charset_arg_superset];
1079 charset.method = CHARSET_METHOD_SUPERSET;
1080 val = Fcopy_sequence (val);
1081 ASET (attrs, charset_superset, val);
1082
1083 charset.min_char = MAX_CHAR;
1084 charset.max_char = 0;
1085 for (; ! NILP (val); val = Fcdr (val))
1086 {
1087 Lisp_Object elt, car_part, cdr_part;
1088 int this_id, offset;
1089 struct charset *this_charset;
1090
1091 elt = Fcar (val);
1092 if (CONSP (elt))
1093 {
1094 car_part = XCAR (elt);
1095 cdr_part = XCDR (elt);
1096 CHECK_CHARSET_GET_ID (car_part, this_id);
1097 CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
1098 offset = XINT (cdr_part);
1099 }
1100 else
1101 {
1102 CHECK_CHARSET_GET_ID (elt, this_id);
1103 offset = 0;
1104 }
1105 XSETCAR (val, Fcons (make_number (this_id), make_number (offset)));
1106
1107 this_charset = CHARSET_FROM_ID (this_id);
1108 if (charset.min_char > this_charset->min_char)
1109 charset.min_char = this_charset->min_char;
1110 if (charset.max_char < this_charset->max_char)
1111 charset.max_char = this_charset->max_char;
1112 for (i = 0; i < 190; i++)
1113 charset.fast_map[i] |= this_charset->fast_map[i];
1114 }
1115 }
1116 else
1117 error ("None of :code-offset, :map, :parents are specified");
1118
1119 val = args[charset_arg_unify_map];
1120 if (! NILP (val) && !STRINGP (val))
1121 CHECK_VECTOR (val);
1122 ASET (attrs, charset_unify_map, val);
1123
1124 CHECK_LIST (args[charset_arg_plist]);
1125 ASET (attrs, charset_plist, args[charset_arg_plist]);
1126
1127 charset.hash_index = hash_lookup (hash_table, args[charset_arg_name],
1128 &hash_code);
1129 if (charset.hash_index >= 0)
1130 {
1131 new_definition_p = 0;
1132 id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
1133 set_hash_value_slot (hash_table, charset.hash_index, attrs);
1134 }
1135 else
1136 {
1137 charset.hash_index = hash_put (hash_table, args[charset_arg_name], attrs,
1138 hash_code);
1139 if (charset_table_used == charset_table_size)
1140 {
1141 /* Ensure that charset IDs fit into 'int' as well as into the
1142 restriction imposed by fixnums. Although the 'int' restriction
1143 could be removed, too much other code would need altering; for
1144 example, the IDs are stuffed into struct
1145 coding_system.charbuf[i] entries, which are 'int'. */
1146 int old_size = charset_table_size;
1147 ptrdiff_t new_size = old_size;
1148 struct charset *new_table =
1149 xpalloc (0, &new_size, 1,
1150 min (INT_MAX, MOST_POSITIVE_FIXNUM),
1151 sizeof *charset_table);
1152 memcpy (new_table, charset_table, old_size * sizeof *new_table);
1153 charset_table = new_table;
1154 charset_table_size = new_size;
1155 /* FIXME: This leaks memory, as the old charset_table becomes
1156 unreachable. If the old charset table is charset_table_init
1157 then this leak is intentional; otherwise, it's unclear.
1158 If the latter memory leak is intentional, a
1159 comment should be added to explain this. If not, the old
1160 charset_table should be freed, by passing it as the 1st argument
1161 to xpalloc and removing the memcpy. */
1162 }
1163 id = charset_table_used++;
1164 new_definition_p = 1;
1165 }
1166
1167 ASET (attrs, charset_id, make_number (id));
1168 charset.id = id;
1169 charset_table[id] = charset;
1170
1171 if (charset.method == CHARSET_METHOD_MAP)
1172 {
1173 load_charset (&charset, 0);
1174 charset_table[id] = charset;
1175 }
1176
1177 if (charset.iso_final >= 0)
1178 {
1179 ISO_CHARSET_TABLE (charset.dimension, charset.iso_chars_96,
1180 charset.iso_final) = id;
1181 if (new_definition_p)
1182 Viso_2022_charset_list = nconc2 (Viso_2022_charset_list,
1183 list1 (make_number (id)));
1184 if (ISO_CHARSET_TABLE (1, 0, 'J') == id)
1185 charset_jisx0201_roman = id;
1186 else if (ISO_CHARSET_TABLE (2, 0, '@') == id)
1187 charset_jisx0208_1978 = id;
1188 else if (ISO_CHARSET_TABLE (2, 0, 'B') == id)
1189 charset_jisx0208 = id;
1190 else if (ISO_CHARSET_TABLE (2, 0, 'C') == id)
1191 charset_ksc5601 = id;
1192 }
1193
1194 if (charset.emacs_mule_id >= 0)
1195 {
1196 emacs_mule_charset[charset.emacs_mule_id] = id;
1197 if (charset.emacs_mule_id < 0xA0)
1198 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 1;
1199 else
1200 emacs_mule_bytes[charset.emacs_mule_id] = charset.dimension + 2;
1201 if (new_definition_p)
1202 Vemacs_mule_charset_list = nconc2 (Vemacs_mule_charset_list,
1203 list1 (make_number (id)));
1204 }
1205
1206 if (new_definition_p)
1207 {
1208 Vcharset_list = Fcons (args[charset_arg_name], Vcharset_list);
1209 if (charset.supplementary_p)
1210 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1211 list1 (make_number (id)));
1212 else
1213 {
1214 Lisp_Object tail;
1215
1216 for (tail = Vcharset_ordered_list; CONSP (tail); tail = XCDR (tail))
1217 {
1218 struct charset *cs = CHARSET_FROM_ID (XINT (XCAR (tail)));
1219
1220 if (cs->supplementary_p)
1221 break;
1222 }
1223 if (EQ (tail, Vcharset_ordered_list))
1224 Vcharset_ordered_list = Fcons (make_number (id),
1225 Vcharset_ordered_list);
1226 else if (NILP (tail))
1227 Vcharset_ordered_list = nconc2 (Vcharset_ordered_list,
1228 list1 (make_number (id)));
1229 else
1230 {
1231 val = Fcons (XCAR (tail), XCDR (tail));
1232 XSETCDR (tail, val);
1233 XSETCAR (tail, make_number (id));
1234 }
1235 }
1236 charset_ordered_list_tick++;
1237 }
1238
1239 return Qnil;
1240 }
1241
1242
1243 /* Same as Fdefine_charset_internal but arguments are more convenient
1244 to call from C (typically in syms_of_charset). This can define a
1245 charset of `offset' method only. Return the ID of the new
1246 charset. */
1247
1248 static int
1249 define_charset_internal (Lisp_Object name,
1250 int dimension,
1251 const char *code_space_chars,
1252 unsigned min_code, unsigned max_code,
1253 int iso_final, int iso_revision, int emacs_mule_id,
1254 bool ascii_compatible, bool supplementary,
1255 int code_offset)
1256 {
1257 const unsigned char *code_space = (const unsigned char *) code_space_chars;
1258 Lisp_Object args[charset_arg_max];
1259 Lisp_Object val;
1260 int i;
1261
1262 args[charset_arg_name] = name;
1263 args[charset_arg_dimension] = make_number (dimension);
1264 val = make_uninit_vector (8);
1265 for (i = 0; i < 8; i++)
1266 ASET (val, i, make_number (code_space[i]));
1267 args[charset_arg_code_space] = val;
1268 args[charset_arg_min_code] = make_number (min_code);
1269 args[charset_arg_max_code] = make_number (max_code);
1270 args[charset_arg_iso_final]
1271 = (iso_final < 0 ? Qnil : make_number (iso_final));
1272 args[charset_arg_iso_revision] = make_number (iso_revision);
1273 args[charset_arg_emacs_mule_id]
1274 = (emacs_mule_id < 0 ? Qnil : make_number (emacs_mule_id));
1275 args[charset_arg_ascii_compatible_p] = ascii_compatible ? Qt : Qnil;
1276 args[charset_arg_supplementary_p] = supplementary ? Qt : Qnil;
1277 args[charset_arg_invalid_code] = Qnil;
1278 args[charset_arg_code_offset] = make_number (code_offset);
1279 args[charset_arg_map] = Qnil;
1280 args[charset_arg_subset] = Qnil;
1281 args[charset_arg_superset] = Qnil;
1282 args[charset_arg_unify_map] = Qnil;
1283
1284 args[charset_arg_plist] =
1285 listn (CONSTYPE_HEAP, 14,
1286 intern_c_string (":name"),
1287 args[charset_arg_name],
1288 intern_c_string (":dimension"),
1289 args[charset_arg_dimension],
1290 intern_c_string (":code-space"),
1291 args[charset_arg_code_space],
1292 intern_c_string (":iso-final-char"),
1293 args[charset_arg_iso_final],
1294 intern_c_string (":emacs-mule-id"),
1295 args[charset_arg_emacs_mule_id],
1296 intern_c_string (":ascii-compatible-p"),
1297 args[charset_arg_ascii_compatible_p],
1298 intern_c_string (":code-offset"),
1299 args[charset_arg_code_offset]);
1300 Fdefine_charset_internal (charset_arg_max, args);
1301
1302 return XINT (CHARSET_SYMBOL_ID (name));
1303 }
1304
1305
1306 DEFUN ("define-charset-alias", Fdefine_charset_alias,
1307 Sdefine_charset_alias, 2, 2, 0,
1308 doc: /* Define ALIAS as an alias for charset CHARSET. */)
1309 (Lisp_Object alias, Lisp_Object charset)
1310 {
1311 Lisp_Object attr;
1312
1313 CHECK_CHARSET_GET_ATTR (charset, attr);
1314 Fputhash (alias, attr, Vcharset_hash_table);
1315 Vcharset_list = Fcons (alias, Vcharset_list);
1316 return Qnil;
1317 }
1318
1319
1320 DEFUN ("charset-plist", Fcharset_plist, Scharset_plist, 1, 1, 0,
1321 doc: /* Return the property list of CHARSET. */)
1322 (Lisp_Object charset)
1323 {
1324 Lisp_Object attrs;
1325
1326 CHECK_CHARSET_GET_ATTR (charset, attrs);
1327 return CHARSET_ATTR_PLIST (attrs);
1328 }
1329
1330
1331 DEFUN ("set-charset-plist", Fset_charset_plist, Sset_charset_plist, 2, 2, 0,
1332 doc: /* Set CHARSET's property list to PLIST. */)
1333 (Lisp_Object charset, Lisp_Object plist)
1334 {
1335 Lisp_Object attrs;
1336
1337 CHECK_CHARSET_GET_ATTR (charset, attrs);
1338 ASET (attrs, charset_plist, plist);
1339 return plist;
1340 }
1341
1342
1343 DEFUN ("unify-charset", Funify_charset, Sunify_charset, 1, 3, 0,
1344 doc: /* Unify characters of CHARSET with Unicode.
1345 This means reading the relevant file and installing the table defined
1346 by CHARSET's `:unify-map' property.
1347
1348 Optional second arg UNIFY-MAP is a file name string or a vector. It has
1349 the same meaning as the `:unify-map' attribute in the function
1350 `define-charset' (which see).
1351
1352 Optional third argument DEUNIFY, if non-nil, means to de-unify CHARSET. */)
1353 (Lisp_Object charset, Lisp_Object unify_map, Lisp_Object deunify)
1354 {
1355 int id;
1356 struct charset *cs;
1357
1358 CHECK_CHARSET_GET_ID (charset, id);
1359 cs = CHARSET_FROM_ID (id);
1360 if (NILP (deunify)
1361 ? CHARSET_UNIFIED_P (cs) && ! NILP (CHARSET_DEUNIFIER (cs))
1362 : ! CHARSET_UNIFIED_P (cs))
1363 return Qnil;
1364
1365 CHARSET_UNIFIED_P (cs) = 0;
1366 if (NILP (deunify))
1367 {
1368 if (CHARSET_METHOD (cs) != CHARSET_METHOD_OFFSET
1369 || CHARSET_CODE_OFFSET (cs) < 0x110000)
1370 error ("Can't unify charset: %s", SDATA (SYMBOL_NAME (charset)));
1371 if (NILP (unify_map))
1372 unify_map = CHARSET_UNIFY_MAP (cs);
1373 else
1374 {
1375 if (! STRINGP (unify_map) && ! VECTORP (unify_map))
1376 signal_error ("Bad unify-map", unify_map);
1377 set_charset_attr (cs, charset_unify_map, unify_map);
1378 }
1379 if (NILP (Vchar_unify_table))
1380 Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
1381 char_table_set_range (Vchar_unify_table,
1382 cs->min_char, cs->max_char, charset);
1383 CHARSET_UNIFIED_P (cs) = 1;
1384 }
1385 else if (CHAR_TABLE_P (Vchar_unify_table))
1386 {
1387 unsigned min_code = CHARSET_MIN_CODE (cs);
1388 unsigned max_code = CHARSET_MAX_CODE (cs);
1389 int min_char = DECODE_CHAR (cs, min_code);
1390 int max_char = DECODE_CHAR (cs, max_code);
1391
1392 char_table_set_range (Vchar_unify_table, min_char, max_char, Qnil);
1393 }
1394
1395 return Qnil;
1396 }
1397
1398 /* Check that DIMENSION, CHARS, and FINAL_CHAR specify a valid ISO charset.
1399 Return true if it's a 96-character set, false if 94. */
1400
1401 static bool
1402 check_iso_charset_parameter (Lisp_Object dimension, Lisp_Object chars,
1403 Lisp_Object final_char)
1404 {
1405 CHECK_NUMBER (dimension);
1406 CHECK_NUMBER (chars);
1407 CHECK_CHARACTER (final_char);
1408
1409 if (! (1 <= XINT (dimension) && XINT (dimension) <= 3))
1410 error ("Invalid DIMENSION %"pI"d, it should be 1, 2, or 3",
1411 XINT (dimension));
1412
1413 bool chars_flag = XINT (chars) == 96;
1414 if (! (chars_flag || XINT (chars) == 94))
1415 error ("Invalid CHARS %"pI"d, it should be 94 or 96", XINT (chars));
1416
1417 int final_ch = XFASTINT (final_char);
1418 if (! ('0' <= final_ch && final_ch <= '~'))
1419 error ("Invalid FINAL-CHAR '%c', it should be '0'..'~'", final_ch);
1420
1421 return chars_flag;
1422 }
1423
1424 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
1425 Sget_unused_iso_final_char, 2, 2, 0,
1426 doc: /*
1427 Return an unused ISO final char for a charset of DIMENSION and CHARS.
1428 DIMENSION is the number of bytes to represent a character: 1 or 2.
1429 CHARS is the number of characters in a dimension: 94 or 96.
1430
1431 This final char is for private use, thus the range is `0' (48) .. `?' (63).
1432 If there's no unused final char for the specified kind of charset,
1433 return nil. */)
1434 (Lisp_Object dimension, Lisp_Object chars)
1435 {
1436 bool chars_flag = check_iso_charset_parameter (dimension, chars,
1437 make_number ('0'));
1438 for (int final_char = '0'; final_char <= '?'; final_char++)
1439 if (ISO_CHARSET_TABLE (XINT (dimension), chars_flag, final_char) < 0)
1440 return make_number (final_char);
1441 return Qnil;
1442 }
1443
1444
1445 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
1446 4, 4, 0,
1447 doc: /* Declare an equivalent charset for ISO-2022 decoding.
1448
1449 On decoding by an ISO-2022 base coding system, when a charset
1450 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
1451 if CHARSET is designated instead. */)
1452 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
1453 {
1454 int id;
1455
1456 CHECK_CHARSET_GET_ID (charset, id);
1457 bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
1458 ISO_CHARSET_TABLE (XINT (dimension), chars_flag, XFASTINT (final_char)) = id;
1459 return Qnil;
1460 }
1461
1462
1463 /* Return information about charsets in the text at PTR of NBYTES
1464 bytes, which are NCHARS characters. The value is:
1465
1466 0: Each character is represented by one byte. This is always
1467 true for a unibyte string. For a multibyte string, true if
1468 it contains only ASCII characters.
1469
1470 1: No charsets other than ascii, control-1, and latin-1 are
1471 found.
1472
1473 2: Otherwise.
1474 */
1475
1476 int
1477 string_xstring_p (Lisp_Object string)
1478 {
1479 const unsigned char *p = SDATA (string);
1480 const unsigned char *endp = p + SBYTES (string);
1481
1482 if (SCHARS (string) == SBYTES (string))
1483 return 0;
1484
1485 while (p < endp)
1486 {
1487 int c = STRING_CHAR_ADVANCE (p);
1488
1489 if (c >= 0x100)
1490 return 2;
1491 }
1492 return 1;
1493 }
1494
1495
1496 /* Find charsets in the string at PTR of NCHARS and NBYTES.
1497
1498 CHARSETS is a vector. If Nth element is non-nil, it means the
1499 charset whose id is N is already found.
1500
1501 It may lookup a translation table TABLE if supplied. */
1502
1503 static void
1504 find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
1505 ptrdiff_t nbytes, Lisp_Object charsets,
1506 Lisp_Object table, bool multibyte)
1507 {
1508 const unsigned char *pend = ptr + nbytes;
1509
1510 if (nchars == nbytes)
1511 {
1512 if (multibyte)
1513 ASET (charsets, charset_ascii, Qt);
1514 else
1515 while (ptr < pend)
1516 {
1517 int c = *ptr++;
1518
1519 if (!NILP (table))
1520 c = translate_char (table, c);
1521 if (ASCII_CHAR_P (c))
1522 ASET (charsets, charset_ascii, Qt);
1523 else
1524 ASET (charsets, charset_eight_bit, Qt);
1525 }
1526 }
1527 else
1528 {
1529 while (ptr < pend)
1530 {
1531 int c = STRING_CHAR_ADVANCE (ptr);
1532 struct charset *charset;
1533
1534 if (!NILP (table))
1535 c = translate_char (table, c);
1536 charset = CHAR_CHARSET (c);
1537 ASET (charsets, CHARSET_ID (charset), Qt);
1538 }
1539 }
1540 }
1541
1542 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
1543 2, 3, 0,
1544 doc: /* Return a list of charsets in the region between BEG and END.
1545 BEG and END are buffer positions.
1546 Optional arg TABLE if non-nil is a translation table to look up.
1547
1548 If the current buffer is unibyte, the returned list may contain
1549 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1550 (Lisp_Object beg, Lisp_Object end, Lisp_Object table)
1551 {
1552 Lisp_Object charsets;
1553 ptrdiff_t from, from_byte, to, stop, stop_byte;
1554 int i;
1555 Lisp_Object val;
1556 bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
1557
1558 validate_region (&beg, &end);
1559 from = XFASTINT (beg);
1560 stop = to = XFASTINT (end);
1561
1562 if (from < GPT && GPT < to)
1563 {
1564 stop = GPT;
1565 stop_byte = GPT_BYTE;
1566 }
1567 else
1568 stop_byte = CHAR_TO_BYTE (stop);
1569
1570 from_byte = CHAR_TO_BYTE (from);
1571
1572 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1573 while (1)
1574 {
1575 find_charsets_in_text (BYTE_POS_ADDR (from_byte), stop - from,
1576 stop_byte - from_byte, charsets, table,
1577 multibyte);
1578 if (stop < to)
1579 {
1580 from = stop, from_byte = stop_byte;
1581 stop = to, stop_byte = CHAR_TO_BYTE (stop);
1582 }
1583 else
1584 break;
1585 }
1586
1587 val = Qnil;
1588 for (i = charset_table_used - 1; i >= 0; i--)
1589 if (!NILP (AREF (charsets, i)))
1590 val = Fcons (CHARSET_NAME (charset_table + i), val);
1591 return val;
1592 }
1593
1594 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
1595 1, 2, 0,
1596 doc: /* Return a list of charsets in STR.
1597 Optional arg TABLE if non-nil is a translation table to look up.
1598
1599 If STR is unibyte, the returned list may contain
1600 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
1601 (Lisp_Object str, Lisp_Object table)
1602 {
1603 Lisp_Object charsets;
1604 int i;
1605 Lisp_Object val;
1606
1607 CHECK_STRING (str);
1608
1609 charsets = Fmake_vector (make_number (charset_table_used), Qnil);
1610 find_charsets_in_text (SDATA (str), SCHARS (str), SBYTES (str),
1611 charsets, table,
1612 STRING_MULTIBYTE (str));
1613 val = Qnil;
1614 for (i = charset_table_used - 1; i >= 0; i--)
1615 if (!NILP (AREF (charsets, i)))
1616 val = Fcons (CHARSET_NAME (charset_table + i), val);
1617 return val;
1618 }
1619
1620 \f
1621
1622 /* Return a unified character code for C (>= 0x110000). VAL is a
1623 value of Vchar_unify_table for C; i.e. it is nil, an integer, or a
1624 charset symbol. */
1625 static int
1626 maybe_unify_char (int c, Lisp_Object val)
1627 {
1628 struct charset *charset;
1629
1630 if (INTEGERP (val))
1631 return XFASTINT (val);
1632 if (NILP (val))
1633 return c;
1634
1635 CHECK_CHARSET_GET_CHARSET (val, charset);
1636 #ifdef REL_ALLOC
1637 /* The call to load_charset below can allocate memory, which screws
1638 callers of this function through STRING_CHAR_* macros that hold C
1639 pointers to buffer text, if REL_ALLOC is used. */
1640 r_alloc_inhibit_buffer_relocation (1);
1641 #endif
1642 load_charset (charset, 1);
1643 if (! inhibit_load_charset_map)
1644 {
1645 val = CHAR_TABLE_REF (Vchar_unify_table, c);
1646 if (! NILP (val))
1647 c = XFASTINT (val);
1648 }
1649 else
1650 {
1651 int code_index = c - CHARSET_CODE_OFFSET (charset);
1652 int unified = GET_TEMP_CHARSET_WORK_DECODER (code_index);
1653
1654 if (unified > 0)
1655 c = unified;
1656 }
1657 #ifdef REL_ALLOC
1658 r_alloc_inhibit_buffer_relocation (0);
1659 #endif
1660 return c;
1661 }
1662
1663
1664 /* Return a character corresponding to the code-point CODE of
1665 CHARSET. */
1666
1667 int
1668 decode_char (struct charset *charset, unsigned int code)
1669 {
1670 int c, char_index;
1671 enum charset_method method = CHARSET_METHOD (charset);
1672
1673 if (code < CHARSET_MIN_CODE (charset) || code > CHARSET_MAX_CODE (charset))
1674 return -1;
1675
1676 if (method == CHARSET_METHOD_SUBSET)
1677 {
1678 Lisp_Object subset_info;
1679
1680 subset_info = CHARSET_SUBSET (charset);
1681 charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1682 code -= XINT (AREF (subset_info, 3));
1683 if (code < XFASTINT (AREF (subset_info, 1))
1684 || code > XFASTINT (AREF (subset_info, 2)))
1685 c = -1;
1686 else
1687 c = DECODE_CHAR (charset, code);
1688 }
1689 else if (method == CHARSET_METHOD_SUPERSET)
1690 {
1691 Lisp_Object parents;
1692
1693 parents = CHARSET_SUPERSET (charset);
1694 c = -1;
1695 for (; CONSP (parents); parents = XCDR (parents))
1696 {
1697 int id = XINT (XCAR (XCAR (parents)));
1698 int code_offset = XINT (XCDR (XCAR (parents)));
1699 unsigned this_code = code - code_offset;
1700
1701 charset = CHARSET_FROM_ID (id);
1702 if ((c = DECODE_CHAR (charset, this_code)) >= 0)
1703 break;
1704 }
1705 }
1706 else
1707 {
1708 char_index = CODE_POINT_TO_INDEX (charset, code);
1709 if (char_index < 0)
1710 return -1;
1711
1712 if (method == CHARSET_METHOD_MAP)
1713 {
1714 Lisp_Object decoder;
1715
1716 decoder = CHARSET_DECODER (charset);
1717 if (! VECTORP (decoder))
1718 {
1719 load_charset (charset, 1);
1720 decoder = CHARSET_DECODER (charset);
1721 }
1722 if (VECTORP (decoder))
1723 c = XINT (AREF (decoder, char_index));
1724 else
1725 c = GET_TEMP_CHARSET_WORK_DECODER (char_index);
1726 }
1727 else /* method == CHARSET_METHOD_OFFSET */
1728 {
1729 c = char_index + CHARSET_CODE_OFFSET (charset);
1730 if (CHARSET_UNIFIED_P (charset)
1731 && MAX_UNICODE_CHAR < c && c <= MAX_5_BYTE_CHAR)
1732 {
1733 /* Unify C with a Unicode character if possible. */
1734 Lisp_Object val = CHAR_TABLE_REF (Vchar_unify_table, c);
1735 c = maybe_unify_char (c, val);
1736 }
1737 }
1738 }
1739
1740 return c;
1741 }
1742
1743 /* Variable used temporarily by the macro ENCODE_CHAR. */
1744 Lisp_Object charset_work;
1745
1746 /* Return a code-point of C in CHARSET. If C doesn't belong to
1747 CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
1748 use CHARSET's strict_max_char instead of max_char. */
1749
1750 unsigned
1751 encode_char (struct charset *charset, int c)
1752 {
1753 unsigned code;
1754 enum charset_method method = CHARSET_METHOD (charset);
1755
1756 if (CHARSET_UNIFIED_P (charset))
1757 {
1758 Lisp_Object deunifier;
1759 int code_index = -1;
1760
1761 deunifier = CHARSET_DEUNIFIER (charset);
1762 if (! CHAR_TABLE_P (deunifier))
1763 {
1764 load_charset (charset, 2);
1765 deunifier = CHARSET_DEUNIFIER (charset);
1766 }
1767 if (CHAR_TABLE_P (deunifier))
1768 {
1769 Lisp_Object deunified = CHAR_TABLE_REF (deunifier, c);
1770
1771 if (INTEGERP (deunified))
1772 code_index = XINT (deunified);
1773 }
1774 else
1775 {
1776 code_index = GET_TEMP_CHARSET_WORK_ENCODER (c);
1777 }
1778 if (code_index >= 0)
1779 c = CHARSET_CODE_OFFSET (charset) + code_index;
1780 }
1781
1782 if (method == CHARSET_METHOD_SUBSET)
1783 {
1784 Lisp_Object subset_info;
1785 struct charset *this_charset;
1786
1787 subset_info = CHARSET_SUBSET (charset);
1788 this_charset = CHARSET_FROM_ID (XFASTINT (AREF (subset_info, 0)));
1789 code = ENCODE_CHAR (this_charset, c);
1790 if (code == CHARSET_INVALID_CODE (this_charset)
1791 || code < XFASTINT (AREF (subset_info, 1))
1792 || code > XFASTINT (AREF (subset_info, 2)))
1793 return CHARSET_INVALID_CODE (charset);
1794 code += XINT (AREF (subset_info, 3));
1795 return code;
1796 }
1797
1798 if (method == CHARSET_METHOD_SUPERSET)
1799 {
1800 Lisp_Object parents;
1801
1802 parents = CHARSET_SUPERSET (charset);
1803 for (; CONSP (parents); parents = XCDR (parents))
1804 {
1805 int id = XINT (XCAR (XCAR (parents)));
1806 int code_offset = XINT (XCDR (XCAR (parents)));
1807 struct charset *this_charset = CHARSET_FROM_ID (id);
1808
1809 code = ENCODE_CHAR (this_charset, c);
1810 if (code != CHARSET_INVALID_CODE (this_charset))
1811 return code + code_offset;
1812 }
1813 return CHARSET_INVALID_CODE (charset);
1814 }
1815
1816 if (! CHARSET_FAST_MAP_REF ((c), charset->fast_map)
1817 || c < CHARSET_MIN_CHAR (charset) || c > CHARSET_MAX_CHAR (charset))
1818 return CHARSET_INVALID_CODE (charset);
1819
1820 if (method == CHARSET_METHOD_MAP)
1821 {
1822 Lisp_Object encoder;
1823 Lisp_Object val;
1824
1825 encoder = CHARSET_ENCODER (charset);
1826 if (! CHAR_TABLE_P (CHARSET_ENCODER (charset)))
1827 {
1828 load_charset (charset, 2);
1829 encoder = CHARSET_ENCODER (charset);
1830 }
1831 if (CHAR_TABLE_P (encoder))
1832 {
1833 val = CHAR_TABLE_REF (encoder, c);
1834 if (NILP (val))
1835 return CHARSET_INVALID_CODE (charset);
1836 code = XINT (val);
1837 if (! CHARSET_COMPACT_CODES_P (charset))
1838 code = INDEX_TO_CODE_POINT (charset, code);
1839 }
1840 else
1841 {
1842 code = GET_TEMP_CHARSET_WORK_ENCODER (c);
1843 code = INDEX_TO_CODE_POINT (charset, code);
1844 }
1845 }
1846 else /* method == CHARSET_METHOD_OFFSET */
1847 {
1848 unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
1849
1850 code = INDEX_TO_CODE_POINT (charset, code_index);
1851 }
1852
1853 return code;
1854 }
1855
1856
1857 DEFUN ("decode-char", Fdecode_char, Sdecode_char, 2, 3, 0,
1858 doc: /* Decode the pair of CHARSET and CODE-POINT into a character.
1859 Return nil if CODE-POINT is not valid in CHARSET.
1860
1861 CODE-POINT may be a cons (HIGHER-16-BIT-VALUE . LOWER-16-BIT-VALUE). */)
1862 (Lisp_Object charset, Lisp_Object code_point, Lisp_Object restriction)
1863 {
1864 int c, id;
1865 unsigned code;
1866 struct charset *charsetp;
1867
1868 CHECK_CHARSET_GET_ID (charset, id);
1869 code = cons_to_unsigned (code_point, UINT_MAX);
1870 charsetp = CHARSET_FROM_ID (id);
1871 c = DECODE_CHAR (charsetp, code);
1872 return (c >= 0 ? make_number (c) : Qnil);
1873 }
1874
1875
1876 DEFUN ("encode-char", Fencode_char, Sencode_char, 2, 3, 0,
1877 doc: /* Encode the character CH into a code-point of CHARSET.
1878 Return nil if CHARSET doesn't include CH. */)
1879 (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
1880 {
1881 int c, id;
1882 unsigned code;
1883 struct charset *charsetp;
1884
1885 CHECK_CHARSET_GET_ID (charset, id);
1886 CHECK_CHARACTER (ch);
1887 c = XFASTINT (ch);
1888 charsetp = CHARSET_FROM_ID (id);
1889 code = ENCODE_CHAR (charsetp, c);
1890 if (code == CHARSET_INVALID_CODE (charsetp))
1891 return Qnil;
1892 return INTEGER_TO_CONS (code);
1893 }
1894
1895
1896 DEFUN ("make-char", Fmake_char, Smake_char, 1, 5, 0,
1897 doc:
1898 /* Return a character of CHARSET whose position codes are CODEn.
1899
1900 CODE1 through CODE4 are optional, but if you don't supply sufficient
1901 position codes, it is assumed that the minimum code in each dimension
1902 is specified. */)
1903 (Lisp_Object charset, Lisp_Object code1, Lisp_Object code2, Lisp_Object code3, Lisp_Object code4)
1904 {
1905 int id, dimension;
1906 struct charset *charsetp;
1907 unsigned code;
1908 int c;
1909
1910 CHECK_CHARSET_GET_ID (charset, id);
1911 charsetp = CHARSET_FROM_ID (id);
1912
1913 dimension = CHARSET_DIMENSION (charsetp);
1914 if (NILP (code1))
1915 code = (CHARSET_ASCII_COMPATIBLE_P (charsetp)
1916 ? 0 : CHARSET_MIN_CODE (charsetp));
1917 else
1918 {
1919 CHECK_NATNUM (code1);
1920 if (XFASTINT (code1) >= 0x100)
1921 args_out_of_range (make_number (0xFF), code1);
1922 code = XFASTINT (code1);
1923
1924 if (dimension > 1)
1925 {
1926 code <<= 8;
1927 if (NILP (code2))
1928 code |= charsetp->code_space[(dimension - 2) * 4];
1929 else
1930 {
1931 CHECK_NATNUM (code2);
1932 if (XFASTINT (code2) >= 0x100)
1933 args_out_of_range (make_number (0xFF), code2);
1934 code |= XFASTINT (code2);
1935 }
1936
1937 if (dimension > 2)
1938 {
1939 code <<= 8;
1940 if (NILP (code3))
1941 code |= charsetp->code_space[(dimension - 3) * 4];
1942 else
1943 {
1944 CHECK_NATNUM (code3);
1945 if (XFASTINT (code3) >= 0x100)
1946 args_out_of_range (make_number (0xFF), code3);
1947 code |= XFASTINT (code3);
1948 }
1949
1950 if (dimension > 3)
1951 {
1952 code <<= 8;
1953 if (NILP (code4))
1954 code |= charsetp->code_space[0];
1955 else
1956 {
1957 CHECK_NATNUM (code4);
1958 if (XFASTINT (code4) >= 0x100)
1959 args_out_of_range (make_number (0xFF), code4);
1960 code |= XFASTINT (code4);
1961 }
1962 }
1963 }
1964 }
1965 }
1966
1967 if (CHARSET_ISO_FINAL (charsetp) >= 0)
1968 code &= 0x7F7F7F7F;
1969 c = DECODE_CHAR (charsetp, code);
1970 if (c < 0)
1971 error ("Invalid code(s)");
1972 return make_number (c);
1973 }
1974
1975
1976 /* Return the first charset in CHARSET_LIST that contains C.
1977 CHARSET_LIST is a list of charset IDs. If it is nil, use
1978 Vcharset_ordered_list. */
1979
1980 struct charset *
1981 char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
1982 {
1983 bool maybe_null = 0;
1984
1985 if (NILP (charset_list))
1986 charset_list = Vcharset_ordered_list;
1987 else
1988 maybe_null = 1;
1989
1990 while (CONSP (charset_list))
1991 {
1992 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (charset_list)));
1993 unsigned code = ENCODE_CHAR (charset, c);
1994
1995 if (code != CHARSET_INVALID_CODE (charset))
1996 {
1997 if (code_return)
1998 *code_return = code;
1999 return charset;
2000 }
2001 charset_list = XCDR (charset_list);
2002 if (! maybe_null
2003 && c <= MAX_UNICODE_CHAR
2004 && EQ (charset_list, Vcharset_non_preferred_head))
2005 return CHARSET_FROM_ID (charset_unicode);
2006 }
2007 return (maybe_null ? NULL
2008 : c <= MAX_5_BYTE_CHAR ? CHARSET_FROM_ID (charset_emacs)
2009 : CHARSET_FROM_ID (charset_eight_bit));
2010 }
2011
2012
2013 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
2014 doc:
2015 /*Return list of charset and one to four position-codes of CH.
2016 The charset is decided by the current priority order of charsets.
2017 A position-code is a byte value of each dimension of the code-point of
2018 CH in the charset. */)
2019 (Lisp_Object ch)
2020 {
2021 struct charset *charset;
2022 int c, dimension;
2023 unsigned code;
2024 Lisp_Object val;
2025
2026 CHECK_CHARACTER (ch);
2027 c = XFASTINT (ch);
2028 charset = CHAR_CHARSET (c);
2029 if (! charset)
2030 emacs_abort ();
2031 code = ENCODE_CHAR (charset, c);
2032 if (code == CHARSET_INVALID_CODE (charset))
2033 emacs_abort ();
2034 dimension = CHARSET_DIMENSION (charset);
2035 for (val = Qnil; dimension > 0; dimension--)
2036 {
2037 val = Fcons (make_number (code & 0xFF), val);
2038 code >>= 8;
2039 }
2040 return Fcons (CHARSET_NAME (charset), val);
2041 }
2042
2043
2044 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 2, 0,
2045 doc: /* Return the charset of highest priority that contains CH.
2046 ASCII characters are an exception: for them, this function always
2047 returns `ascii'.
2048 If optional 2nd arg RESTRICTION is non-nil, it is a list of charsets
2049 from which to find the charset. It may also be a coding system. In
2050 that case, find the charset from what supported by that coding system. */)
2051 (Lisp_Object ch, Lisp_Object restriction)
2052 {
2053 struct charset *charset;
2054
2055 CHECK_CHARACTER (ch);
2056 if (NILP (restriction))
2057 charset = CHAR_CHARSET (XINT (ch));
2058 else
2059 {
2060 if (CONSP (restriction))
2061 {
2062 int c = XFASTINT (ch);
2063
2064 for (; CONSP (restriction); restriction = XCDR (restriction))
2065 {
2066 struct charset *rcharset;
2067
2068 CHECK_CHARSET_GET_CHARSET (XCAR (restriction), rcharset);
2069 if (ENCODE_CHAR (rcharset, c) != CHARSET_INVALID_CODE (rcharset))
2070 return XCAR (restriction);
2071 }
2072 return Qnil;
2073 }
2074 restriction = coding_system_charset_list (restriction);
2075 charset = char_charset (XINT (ch), restriction, NULL);
2076 if (! charset)
2077 return Qnil;
2078 }
2079 return (CHARSET_NAME (charset));
2080 }
2081
2082
2083 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
2084 doc: /*
2085 Return charset of a character in the current buffer at position POS.
2086 If POS is nil, it defaults to the current point.
2087 If POS is out of range, the value is nil. */)
2088 (Lisp_Object pos)
2089 {
2090 Lisp_Object ch;
2091 struct charset *charset;
2092
2093 ch = Fchar_after (pos);
2094 if (! INTEGERP (ch))
2095 return ch;
2096 charset = CHAR_CHARSET (XINT (ch));
2097 return (CHARSET_NAME (charset));
2098 }
2099
2100
2101 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
2102 doc: /*
2103 Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
2104
2105 ISO 2022's designation sequence (escape sequence) distinguishes charsets
2106 by their DIMENSION, CHARS, and FINAL-CHAR,
2107 whereas Emacs distinguishes them by charset symbol.
2108 See the documentation of the function `charset-info' for the meanings of
2109 DIMENSION, CHARS, and FINAL-CHAR. */)
2110 (Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
2111 {
2112 bool chars_flag = check_iso_charset_parameter (dimension, chars, final_char);
2113 int id = ISO_CHARSET_TABLE (XINT (dimension), chars_flag,
2114 XFASTINT (final_char));
2115 return (id >= 0 ? CHARSET_NAME (CHARSET_FROM_ID (id)) : Qnil);
2116 }
2117
2118
2119 DEFUN ("clear-charset-maps", Fclear_charset_maps, Sclear_charset_maps,
2120 0, 0, 0,
2121 doc: /*
2122 Internal use only.
2123 Clear temporary charset mapping tables.
2124 It should be called only from temacs invoked for dumping. */)
2125 (void)
2126 {
2127 if (temp_charset_work)
2128 {
2129 xfree (temp_charset_work);
2130 temp_charset_work = NULL;
2131 }
2132
2133 if (CHAR_TABLE_P (Vchar_unify_table))
2134 Foptimize_char_table (Vchar_unify_table, Qnil);
2135
2136 return Qnil;
2137 }
2138
2139 DEFUN ("charset-priority-list", Fcharset_priority_list,
2140 Scharset_priority_list, 0, 1, 0,
2141 doc: /* Return the list of charsets ordered by priority.
2142 HIGHESTP non-nil means just return the highest priority one. */)
2143 (Lisp_Object highestp)
2144 {
2145 Lisp_Object val = Qnil, list = Vcharset_ordered_list;
2146
2147 if (!NILP (highestp))
2148 return CHARSET_NAME (CHARSET_FROM_ID (XINT (Fcar (list))));
2149
2150 while (!NILP (list))
2151 {
2152 val = Fcons (CHARSET_NAME (CHARSET_FROM_ID (XINT (XCAR (list)))), val);
2153 list = XCDR (list);
2154 }
2155 return Fnreverse (val);
2156 }
2157
2158 DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
2159 1, MANY, 0,
2160 doc: /* Assign higher priority to the charsets given as arguments.
2161 usage: (set-charset-priority &rest charsets) */)
2162 (ptrdiff_t nargs, Lisp_Object *args)
2163 {
2164 Lisp_Object new_head, old_list, arglist[2];
2165 Lisp_Object list_2022, list_emacs_mule;
2166 ptrdiff_t i;
2167 int id;
2168
2169 old_list = Fcopy_sequence (Vcharset_ordered_list);
2170 new_head = Qnil;
2171 for (i = 0; i < nargs; i++)
2172 {
2173 CHECK_CHARSET_GET_ID (args[i], id);
2174 if (! NILP (Fmemq (make_number (id), old_list)))
2175 {
2176 old_list = Fdelq (make_number (id), old_list);
2177 new_head = Fcons (make_number (id), new_head);
2178 }
2179 }
2180 arglist[0] = Fnreverse (new_head);
2181 arglist[1] = Vcharset_non_preferred_head = old_list;
2182 Vcharset_ordered_list = Fnconc (2, arglist);
2183 charset_ordered_list_tick++;
2184
2185 charset_unibyte = -1;
2186 for (old_list = Vcharset_ordered_list, list_2022 = list_emacs_mule = Qnil;
2187 CONSP (old_list); old_list = XCDR (old_list))
2188 {
2189 if (! NILP (Fmemq (XCAR (old_list), Viso_2022_charset_list)))
2190 list_2022 = Fcons (XCAR (old_list), list_2022);
2191 if (! NILP (Fmemq (XCAR (old_list), Vemacs_mule_charset_list)))
2192 list_emacs_mule = Fcons (XCAR (old_list), list_emacs_mule);
2193 if (charset_unibyte < 0)
2194 {
2195 struct charset *charset = CHARSET_FROM_ID (XINT (XCAR (old_list)));
2196
2197 if (CHARSET_DIMENSION (charset) == 1
2198 && CHARSET_ASCII_COMPATIBLE_P (charset)
2199 && CHARSET_MAX_CHAR (charset) >= 0x80)
2200 charset_unibyte = CHARSET_ID (charset);
2201 }
2202 }
2203 Viso_2022_charset_list = Fnreverse (list_2022);
2204 Vemacs_mule_charset_list = Fnreverse (list_emacs_mule);
2205 if (charset_unibyte < 0)
2206 charset_unibyte = charset_iso_8859_1;
2207
2208 return Qnil;
2209 }
2210
2211 DEFUN ("charset-id-internal", Fcharset_id_internal, Scharset_id_internal,
2212 0, 1, 0,
2213 doc: /* Internal use only.
2214 Return charset identification number of CHARSET. */)
2215 (Lisp_Object charset)
2216 {
2217 int id;
2218
2219 CHECK_CHARSET_GET_ID (charset, id);
2220 return make_number (id);
2221 }
2222
2223 struct charset_sort_data
2224 {
2225 Lisp_Object charset;
2226 int id;
2227 ptrdiff_t priority;
2228 };
2229
2230 static int
2231 charset_compare (const void *d1, const void *d2)
2232 {
2233 const struct charset_sort_data *data1 = d1, *data2 = d2;
2234 if (data1->priority != data2->priority)
2235 return data1->priority < data2->priority ? -1 : 1;
2236 return 0;
2237 }
2238
2239 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
2240 doc: /* Sort charset list CHARSETS by a priority of each charset.
2241 Return the sorted list. CHARSETS is modified by side effects.
2242 See also `charset-priority-list' and `set-charset-priority'. */)
2243 (Lisp_Object charsets)
2244 {
2245 Lisp_Object len = Flength (charsets);
2246 ptrdiff_t n = XFASTINT (len), i, j;
2247 int done;
2248 Lisp_Object tail, elt, attrs;
2249 struct charset_sort_data *sort_data;
2250 int id, min_id = INT_MAX, max_id = INT_MIN;
2251 USE_SAFE_ALLOCA;
2252
2253 if (n == 0)
2254 return Qnil;
2255 SAFE_NALLOCA (sort_data, 1, n);
2256 for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
2257 {
2258 elt = XCAR (tail);
2259 CHECK_CHARSET_GET_ATTR (elt, attrs);
2260 sort_data[i].charset = elt;
2261 sort_data[i].id = id = XINT (CHARSET_ATTR_ID (attrs));
2262 if (id < min_id)
2263 min_id = id;
2264 if (id > max_id)
2265 max_id = id;
2266 }
2267 for (done = 0, tail = Vcharset_ordered_list, i = 0;
2268 done < n && CONSP (tail); tail = XCDR (tail), i++)
2269 {
2270 elt = XCAR (tail);
2271 id = XFASTINT (elt);
2272 if (id >= min_id && id <= max_id)
2273 for (j = 0; j < n; j++)
2274 if (sort_data[j].id == id)
2275 {
2276 sort_data[j].priority = i;
2277 done++;
2278 }
2279 }
2280 qsort (sort_data, n, sizeof *sort_data, charset_compare);
2281 for (i = 0, tail = charsets; CONSP (tail); tail = XCDR (tail), i++)
2282 XSETCAR (tail, sort_data[i].charset);
2283 SAFE_FREE ();
2284 return charsets;
2285 }
2286
2287 \f
2288 void
2289 init_charset (void)
2290 {
2291 Lisp_Object tempdir;
2292 tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
2293 if (! file_accessible_directory_p (tempdir))
2294 {
2295 /* This used to be non-fatal (dir_warning), but it should not
2296 happen, and if it does sooner or later it will cause some
2297 obscure problem (eg bug#6401), so better abort. */
2298 fprintf (stderr, "Error: charsets directory not found:\n\
2299 %s\n\
2300 Emacs will not function correctly without the character map files.\n%s\
2301 Please check your installation!\n",
2302 SDATA (tempdir),
2303 egetenv("EMACSDATA") ? "The EMACSDATA environment \
2304 variable is set, maybe it has the wrong value?\n" : "");
2305 exit (1);
2306 }
2307
2308 Vcharset_map_path = list1 (tempdir);
2309 }
2310
2311
2312 void
2313 init_charset_once (void)
2314 {
2315 int i, j, k;
2316
2317 for (i = 0; i < ISO_MAX_DIMENSION; i++)
2318 for (j = 0; j < ISO_MAX_CHARS; j++)
2319 for (k = 0; k < ISO_MAX_FINAL; k++)
2320 iso_charset_table[i][j][k] = -1;
2321
2322 for (i = 0; i < 256; i++)
2323 emacs_mule_charset[i] = -1;
2324
2325 charset_jisx0201_roman = -1;
2326 charset_jisx0208_1978 = -1;
2327 charset_jisx0208 = -1;
2328 charset_ksc5601 = -1;
2329 }
2330
2331 #ifdef emacs
2332
2333 /* Allocate an initial charset table that is large enough to handle
2334 Emacs while it is bootstrapping. As of September 2011, the size
2335 needs to be at least 166; make it a bit bigger to allow for future
2336 expansion.
2337
2338 Don't make the value so small that the table is reallocated during
2339 bootstrapping, as glibc malloc calls larger than just under 64 KiB
2340 during an initial bootstrap wreak havoc after dumping; see the
2341 M_MMAP_THRESHOLD value in alloc.c, plus there is a extra overhead
2342 internal to glibc malloc and perhaps to Emacs malloc debugging. */
2343 static struct charset charset_table_init[180];
2344
2345 void
2346 syms_of_charset (void)
2347 {
2348 DEFSYM (Qcharsetp, "charsetp");
2349
2350 DEFSYM (Qascii, "ascii");
2351 DEFSYM (Qunicode, "unicode");
2352 DEFSYM (Qemacs, "emacs");
2353 DEFSYM (Qeight_bit, "eight-bit");
2354 DEFSYM (Qiso_8859_1, "iso-8859-1");
2355
2356 DEFSYM (Qgl, "gl");
2357 DEFSYM (Qgr, "gr");
2358
2359 staticpro (&Vcharset_ordered_list);
2360 Vcharset_ordered_list = Qnil;
2361
2362 staticpro (&Viso_2022_charset_list);
2363 Viso_2022_charset_list = Qnil;
2364
2365 staticpro (&Vemacs_mule_charset_list);
2366 Vemacs_mule_charset_list = Qnil;
2367
2368 /* Don't staticpro them here. It's done in syms_of_fns. */
2369 QCtest = intern_c_string (":test");
2370 Qeq = intern_c_string ("eq");
2371
2372 staticpro (&Vcharset_hash_table);
2373 {
2374 Lisp_Object args[2];
2375 args[0] = QCtest;
2376 args[1] = Qeq;
2377 Vcharset_hash_table = Fmake_hash_table (2, args);
2378 }
2379
2380 charset_table = charset_table_init;
2381 charset_table_size = ARRAYELTS (charset_table_init);
2382 charset_table_used = 0;
2383
2384 defsubr (&Scharsetp);
2385 defsubr (&Smap_charset_chars);
2386 defsubr (&Sdefine_charset_internal);
2387 defsubr (&Sdefine_charset_alias);
2388 defsubr (&Scharset_plist);
2389 defsubr (&Sset_charset_plist);
2390 defsubr (&Sunify_charset);
2391 defsubr (&Sget_unused_iso_final_char);
2392 defsubr (&Sdeclare_equiv_charset);
2393 defsubr (&Sfind_charset_region);
2394 defsubr (&Sfind_charset_string);
2395 defsubr (&Sdecode_char);
2396 defsubr (&Sencode_char);
2397 defsubr (&Ssplit_char);
2398 defsubr (&Smake_char);
2399 defsubr (&Schar_charset);
2400 defsubr (&Scharset_after);
2401 defsubr (&Siso_charset);
2402 defsubr (&Sclear_charset_maps);
2403 defsubr (&Scharset_priority_list);
2404 defsubr (&Sset_charset_priority);
2405 defsubr (&Scharset_id_internal);
2406 defsubr (&Ssort_charsets);
2407
2408 DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
2409 doc: /* List of directories to search for charset map files. */);
2410 Vcharset_map_path = Qnil;
2411
2412 DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,
2413 doc: /* Inhibit loading of charset maps. Used when dumping Emacs. */);
2414 inhibit_load_charset_map = 0;
2415
2416 DEFVAR_LISP ("charset-list", Vcharset_list,
2417 doc: /* List of all charsets ever defined. */);
2418 Vcharset_list = Qnil;
2419
2420 DEFVAR_LISP ("current-iso639-language", Vcurrent_iso639_language,
2421 doc: /* ISO639 language mnemonic symbol for the current language environment.
2422 If the current language environment is for multiple languages (e.g. "Latin-1"),
2423 the value may be a list of mnemonics. */);
2424 Vcurrent_iso639_language = Qnil;
2425
2426 charset_ascii
2427 = define_charset_internal (Qascii, 1, "\x00\x7F\0\0\0\0\0",
2428 0, 127, 'B', -1, 0, 1, 0, 0);
2429 charset_iso_8859_1
2430 = define_charset_internal (Qiso_8859_1, 1, "\x00\xFF\0\0\0\0\0",
2431 0, 255, -1, -1, -1, 1, 0, 0);
2432 charset_unicode
2433 = define_charset_internal (Qunicode, 3, "\x00\xFF\x00\xFF\x00\x10\0",
2434 0, MAX_UNICODE_CHAR, -1, 0, -1, 1, 0, 0);
2435 charset_emacs
2436 = define_charset_internal (Qemacs, 3, "\x00\xFF\x00\xFF\x00\x3F\0",
2437 0, MAX_5_BYTE_CHAR, -1, 0, -1, 1, 1, 0);
2438 charset_eight_bit
2439 = define_charset_internal (Qeight_bit, 1, "\x80\xFF\0\0\0\0\0",
2440 128, 255, -1, 0, -1, 0, 1,
2441 MAX_5_BYTE_CHAR + 1);
2442 charset_unibyte = charset_iso_8859_1;
2443 }
2444
2445 #endif /* emacs */