]> code.delx.au - gnu-emacs/blob - src/charset.c
(string_to_non_ascii_char): Change the check for the
[gnu-emacs] / src / charset.c
1 /* Basic multilingual character support.
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 /* At first, see the document in `charset.h' to understand the code in
23 this file. */
24
25 #include <stdio.h>
26
27 #ifdef emacs
28
29 #include <sys/types.h>
30 #include <config.h>
31 #include "lisp.h"
32 #include "buffer.h"
33 #include "charset.h"
34 #include "coding.h"
35 #include "disptab.h"
36
37 #else /* not emacs */
38
39 #include "mulelib.h"
40
41 #endif /* emacs */
42
43 Lisp_Object Qcharset, Qascii, Qcomposition;
44
45 /* Declaration of special leading-codes. */
46 int leading_code_composition; /* for composite characters */
47 int leading_code_private_11; /* for private DIMENSION1 of 1-column */
48 int leading_code_private_12; /* for private DIMENSION1 of 2-column */
49 int leading_code_private_21; /* for private DIMENSION2 of 1-column */
50 int leading_code_private_22; /* for private DIMENSION2 of 2-column */
51
52 /* Declaration of special charsets. */
53 int charset_ascii; /* ASCII */
54 int charset_composition; /* for a composite character */
55 int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
56 int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
57 int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
58 int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
59 int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
60 int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
61 int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
62
63 int min_composite_char;
64
65 Lisp_Object Qcharset_table;
66
67 /* A char-table containing information of each character set. */
68 Lisp_Object Vcharset_table;
69
70 /* A vector of charset symbol indexed by charset-id. This is used
71 only for returning charset symbol from C functions. */
72 Lisp_Object Vcharset_symbol_table;
73
74 /* A list of charset symbols ever defined. */
75 Lisp_Object Vcharset_list;
76
77 /* Vector of translation table ever defined.
78 ID of a translation table is used to index this vector. */
79 Lisp_Object Vtranslation_table_vector;
80
81 /* A char-table for characters which may invoke auto-filling. */
82 Lisp_Object Vauto_fill_chars;
83
84 Lisp_Object Qauto_fill_chars;
85
86 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
87 int bytes_by_char_head[256];
88 int width_by_char_head[256];
89
90 /* Mapping table from ISO2022's charset (specified by DIMENSION,
91 CHARS, and FINAL-CHAR) to Emacs' charset. */
92 int iso_charset_table[2][2][128];
93
94 /* Table of pointers to the structure `cmpchar_info' indexed by
95 CMPCHAR-ID. */
96 struct cmpchar_info **cmpchar_table;
97 /* The current size of `cmpchar_table'. */
98 static int cmpchar_table_size;
99 /* Number of the current composite characters. */
100 int n_cmpchars;
101
102 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
103 unsigned char *_fetch_multibyte_char_p;
104 int _fetch_multibyte_char_len;
105
106 /* Offset to add to a non-ASCII value when inserting it. */
107 int nonascii_insert_offset;
108
109 /* Translation table for converting non-ASCII unibyte characters
110 to multibyte codes, or nil. */
111 Lisp_Object Vnonascii_translation_table;
112
113 /* List of all possible generic characters. */
114 Lisp_Object Vgeneric_character_list;
115
116 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
117 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
118 \f
119 void
120 invalid_character (c)
121 int c;
122 {
123 error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
124 }
125
126
127 /* Set STR a pointer to the multi-byte form of the character C. If C
128 is not a composite character, the multi-byte form is set in WORKBUF
129 and STR points WORKBUF. The caller should allocate at least 4-byte
130 area at WORKBUF in advance. Returns the length of the multi-byte
131 form. If C is an invalid character to have a multi-byte form,
132 signal an error.
133
134 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
135 function directly if C can be an ASCII character. */
136
137 int
138 non_ascii_char_to_string (c, workbuf, str)
139 int c;
140 unsigned char *workbuf, **str;
141 {
142 int charset, c1, c2;
143
144 if (COMPOSITE_CHAR_P (c))
145 {
146 int cmpchar_id = COMPOSITE_CHAR_ID (c);
147
148 if (cmpchar_id < n_cmpchars)
149 {
150 *str = cmpchar_table[cmpchar_id]->data;
151 return cmpchar_table[cmpchar_id]->len;
152 }
153 else
154 {
155 invalid_character (c);
156 }
157 }
158
159 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
160 if (!charset
161 || ! CHARSET_DEFINED_P (charset)
162 || c1 >= 0 && c1 < 32
163 || c2 >= 0 && c2 < 32)
164 invalid_character (c);
165
166 *str = workbuf;
167 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
168 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
169 workbuf++;
170 *workbuf++ = c1 | 0x80;
171 if (c2 >= 0)
172 *workbuf++ = c2 | 0x80;
173
174 return (workbuf - *str);
175 }
176
177 /* Return a non-ASCII character of which multi-byte form is at STR of
178 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
179 multibyte form is set to the address ACTUAL_LEN.
180
181 If exclude_tail_garbage is nonzero, ACTUAL_LEN excludes gabage
182 bytes following the non-ASCII character.
183
184 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
185 directly if STR can hold an ASCII character. */
186
187 int
188 string_to_non_ascii_char (str, len, actual_len, exclude_tail_garbage)
189 const unsigned char *str;
190 int len, *actual_len, exclude_tail_garbage;
191 {
192 int charset;
193 unsigned char c1, c2;
194 register int c, bytes;
195
196 c = *str;
197 bytes = 1;
198
199 if (BASE_LEADING_CODE_P (c))
200 {
201 while (bytes < len && ! CHAR_HEAD_P (str[bytes])) bytes++;
202
203 if (c == LEADING_CODE_COMPOSITION)
204 {
205 int cmpchar_id = str_cmpchar_id (str, bytes);
206
207 if (cmpchar_id >= 0)
208 {
209 c = MAKE_COMPOSITE_CHAR (cmpchar_id);
210 if (exclude_tail_garbage)
211 bytes = cmpchar_table[cmpchar_id]->len;
212 }
213 }
214 else
215 {
216 int charset = c, c1, c2 = 0;
217 int char_bytes = BYTES_BY_CHAR_HEAD (c);
218
219 str++;
220 if (c >= LEADING_CODE_PRIVATE_11)
221 charset = *str++;
222 if (CHARSET_DEFINED_P (charset) && char_bytes <= bytes)
223 {
224 c1 = *str++ & 0x7f;
225 if (CHARSET_DIMENSION (charset) == 2)
226 c2 = *str & 0x7F;
227 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
228 if (exclude_tail_garbage)
229 bytes = char_bytes;
230 }
231 }
232 }
233
234 if (actual_len)
235 *actual_len = bytes;
236 return c;
237 }
238
239 /* Return the length of the multi-byte form at string STR of length LEN. */
240 int
241 multibyte_form_length (str, len)
242 const unsigned char *str;
243 int len;
244 {
245 int bytes = 1;
246
247 if (BASE_LEADING_CODE_P (*str))
248 while (bytes < len && ! CHAR_HEAD_P (str[bytes])) bytes++;
249
250 return bytes;
251 }
252
253 /* Check if string STR of length LEN contains valid multi-byte form of
254 a character. If valid, charset and position codes of the character
255 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
256 return -1. This should be used only in the macro SPLIT_STRING
257 which checks range of STR in advance. */
258
259 int
260 split_non_ascii_string (str, len, charset, c1, c2)
261 register const unsigned char *str;
262 register unsigned char *c1, *c2;
263 register int len, *charset;
264 {
265 register unsigned int cs = *str++;
266
267 if (cs == LEADING_CODE_COMPOSITION)
268 {
269 int cmpchar_id = str_cmpchar_id (str - 1, len);
270
271 if (cmpchar_id < 0)
272 return -1;
273 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
274 }
275 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
276 && CHARSET_DEFINED_P (cs))
277 {
278 *charset = cs;
279 if (*str < 0xA0)
280 return -1;
281 *c1 = (*str++) & 0x7F;
282 if (CHARSET_DIMENSION (cs) == 2)
283 {
284 if (*str < 0xA0)
285 return -1;
286 *c2 = (*str++) & 0x7F;
287 }
288 }
289 else
290 return -1;
291 return 0;
292 }
293
294 /* Translate character C by translation table TABLE. If C
295 is negative, translate a character specified by CHARSET, C1, and C2
296 (C1 and C2 are code points of the character). If no translation is
297 found in TABLE, return C. */
298 int
299 translate_char (table, c, charset, c1, c2)
300 Lisp_Object table;
301 int c, charset, c1, c2;
302 {
303 Lisp_Object ch;
304 int alt_charset, alt_c1, alt_c2, dimension;
305
306 if (c < 0) c = MAKE_CHAR (charset, c1, c2);
307 if (!CHAR_TABLE_P (table)
308 || (ch = Faref (table, make_number (c)), !INTEGERP (ch))
309 || XINT (ch) < 0)
310 return c;
311
312 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
313 dimension = CHARSET_DIMENSION (alt_charset);
314 if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
315 /* CH is not a generic character, just return it. */
316 return XFASTINT (ch);
317
318 /* Since CH is a generic character, we must return a specific
319 charater which has the same position codes as C from CH. */
320 if (charset < 0)
321 SPLIT_CHAR (c, charset, c1, c2);
322 if (dimension != CHARSET_DIMENSION (charset))
323 /* We can't make such a character because of dimension mismatch. */
324 return c;
325 return MAKE_CHAR (alt_charset, c1, c2);
326 }
327
328 /* Convert the unibyte character C to multibyte based on
329 Vnonascii_translation_table or nonascii_insert_offset. If they can't
330 convert C to a valid multibyte character, convert it based on
331 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
332
333 int
334 unibyte_char_to_multibyte (c)
335 int c;
336 {
337 if (c < 0400)
338 {
339 int c_save = c;
340
341 if (! NILP (Vnonascii_translation_table))
342 {
343 c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
344 if (c >= 0400 && ! VALID_MULTIBYTE_CHAR_P (c))
345 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
346 }
347 else if (c >= 0240 && nonascii_insert_offset > 0)
348 {
349 c += nonascii_insert_offset;
350 if (c < 0400 || ! VALID_MULTIBYTE_CHAR_P (c))
351 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
352 }
353 else if (c >= 0240)
354 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
355 }
356 return c;
357 }
358
359
360 /* Convert the multibyte character C to unibyte 8-bit character based
361 on Vnonascii_translation_table or nonascii_insert_offset. If
362 REV_TBL is non-nil, it should be a reverse table of
363 Vnonascii_translation_table, i.e. what given by:
364 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
365
366 int
367 multibyte_char_to_unibyte (c, rev_tbl)
368 int c;
369 Lisp_Object rev_tbl;
370 {
371 if (!SINGLE_BYTE_CHAR_P (c))
372 {
373 int c_save = c;
374
375 if (! CHAR_TABLE_P (rev_tbl)
376 && CHAR_TABLE_P (Vnonascii_translation_table))
377 rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
378 make_number (0));
379 if (CHAR_TABLE_P (rev_tbl))
380 {
381 Lisp_Object temp;
382 temp = Faref (rev_tbl, make_number (c));
383 if (INTEGERP (temp))
384 c = XINT (temp);
385 if (c >= 256)
386 c = (c_save & 0177) + 0200;
387 }
388 else
389 {
390 if (nonascii_insert_offset > 0)
391 c -= nonascii_insert_offset;
392 if (c < 128 || c >= 256)
393 c = (c_save & 0177) + 0200;
394 }
395 }
396
397 return c;
398 }
399
400 \f
401 /* Update the table Vcharset_table with the given arguments (see the
402 document of `define-charset' for the meaning of each argument).
403 Several other table contents are also updated. The caller should
404 check the validity of CHARSET-ID and the remaining arguments in
405 advance. */
406
407 void
408 update_charset_table (charset_id, dimension, chars, width, direction,
409 iso_final_char, iso_graphic_plane,
410 short_name, long_name, description)
411 Lisp_Object charset_id, dimension, chars, width, direction;
412 Lisp_Object iso_final_char, iso_graphic_plane;
413 Lisp_Object short_name, long_name, description;
414 {
415 int charset = XINT (charset_id);
416 int bytes;
417 unsigned char leading_code_base, leading_code_ext;
418
419 if (NILP (CHARSET_TABLE_ENTRY (charset)))
420 CHARSET_TABLE_ENTRY (charset)
421 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
422
423 /* Get byte length of multibyte form, base leading-code, and
424 extended leading-code of the charset. See the comment under the
425 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
426 bytes = XINT (dimension);
427 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
428 {
429 /* Official charset, it doesn't have an extended leading-code. */
430 if (charset != CHARSET_ASCII)
431 bytes += 1; /* For a base leading-code. */
432 leading_code_base = charset;
433 leading_code_ext = 0;
434 }
435 else
436 {
437 /* Private charset. */
438 bytes += 2; /* For base and extended leading-codes. */
439 leading_code_base
440 = (charset < LEADING_CODE_EXT_12
441 ? LEADING_CODE_PRIVATE_11
442 : (charset < LEADING_CODE_EXT_21
443 ? LEADING_CODE_PRIVATE_12
444 : (charset < LEADING_CODE_EXT_22
445 ? LEADING_CODE_PRIVATE_21
446 : LEADING_CODE_PRIVATE_22)));
447 leading_code_ext = charset;
448 }
449
450 if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
451 error ("Invalid dimension for the charset-ID %d", charset);
452
453 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
454 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
455 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
456 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
457 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
458 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
459 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
460 = make_number (leading_code_base);
461 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
462 = make_number (leading_code_ext);
463 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
464 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
465 = iso_graphic_plane;
466 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
467 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
468 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
469 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
470
471 {
472 /* If we have already defined a charset which has the same
473 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
474 DIRECTION, we must update the entry REVERSE-CHARSET of both
475 charsets. If there's no such charset, the value of the entry
476 is set to nil. */
477 int i;
478
479 for (i = 0; i <= MAX_CHARSET; i++)
480 if (!NILP (CHARSET_TABLE_ENTRY (i)))
481 {
482 if (CHARSET_DIMENSION (i) == XINT (dimension)
483 && CHARSET_CHARS (i) == XINT (chars)
484 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
485 && CHARSET_DIRECTION (i) != XINT (direction))
486 {
487 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
488 = make_number (i);
489 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
490 break;
491 }
492 }
493 if (i > MAX_CHARSET)
494 /* No such a charset. */
495 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
496 = make_number (-1);
497 }
498
499 if (charset != CHARSET_ASCII
500 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
501 {
502 width_by_char_head[leading_code_base] = XINT (width);
503
504 /* Update table emacs_code_class. */
505 emacs_code_class[charset] = (bytes == 2
506 ? EMACS_leading_code_2
507 : (bytes == 3
508 ? EMACS_leading_code_3
509 : EMACS_leading_code_4));
510 }
511
512 /* Update table iso_charset_table. */
513 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
514 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
515 }
516
517 #ifdef emacs
518
519 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
520 is invalid. */
521 int
522 get_charset_id (charset_symbol)
523 Lisp_Object charset_symbol;
524 {
525 Lisp_Object val;
526 int charset;
527
528 return ((SYMBOLP (charset_symbol)
529 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
530 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
531 CHARSET_VALID_P (charset)))
532 ? charset : -1);
533 }
534
535 /* Return an identification number for a new private charset of
536 DIMENSION and WIDTH. If there's no more room for the new charset,
537 return 0. */
538 Lisp_Object
539 get_new_private_charset_id (dimension, width)
540 int dimension, width;
541 {
542 int charset, from, to;
543
544 if (dimension == 1)
545 {
546 if (width == 1)
547 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
548 else
549 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
550 }
551 else
552 {
553 if (width == 1)
554 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
555 else
556 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX + 1;
557 }
558
559 for (charset = from; charset < to; charset++)
560 if (!CHARSET_DEFINED_P (charset)) break;
561
562 return make_number (charset < to ? charset : 0);
563 }
564
565 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
566 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
567 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
568 treated as a private charset.\n\
569 INFO-VECTOR is a vector of the format:\n\
570 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
571 SHORT-NAME LONG-NAME DESCRIPTION]\n\
572 The meanings of each elements is as follows:\n\
573 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
574 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
575 WIDTH (integer) is the number of columns a character in the charset\n\
576 occupies on the screen: one of 0, 1, and 2.\n\
577 \n\
578 DIRECTION (integer) is the rendering direction of characters in the\n\
579 charset when rendering. If 0, render from left to right, else\n\
580 render from right to left.\n\
581 \n\
582 ISO-FINAL-CHAR (character) is the final character of the\n\
583 corresponding ISO 2022 charset.\n\
584 \n\
585 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
586 while encoding to variants of ISO 2022 coding system, one of the\n\
587 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
588 \n\
589 SHORT-NAME (string) is the short name to refer to the charset.\n\
590 \n\
591 LONG-NAME (string) is the long name to refer to the charset.\n\
592 \n\
593 DESCRIPTION (string) is the description string of the charset.")
594 (charset_id, charset_symbol, info_vector)
595 Lisp_Object charset_id, charset_symbol, info_vector;
596 {
597 Lisp_Object *vec;
598
599 if (!NILP (charset_id))
600 CHECK_NUMBER (charset_id, 0);
601 CHECK_SYMBOL (charset_symbol, 1);
602 CHECK_VECTOR (info_vector, 2);
603
604 if (! NILP (charset_id))
605 {
606 if (! CHARSET_VALID_P (XINT (charset_id)))
607 error ("Invalid CHARSET: %d", XINT (charset_id));
608 else if (CHARSET_DEFINED_P (XINT (charset_id)))
609 error ("Already defined charset: %d", XINT (charset_id));
610 }
611
612 vec = XVECTOR (info_vector)->contents;
613 if (XVECTOR (info_vector)->size != 9
614 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
615 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
616 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
617 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
618 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
619 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
620 || !STRINGP (vec[6])
621 || !STRINGP (vec[7])
622 || !STRINGP (vec[8]))
623 error ("Invalid info-vector argument for defining charset %s",
624 XSYMBOL (charset_symbol)->name->data);
625
626 if (NILP (charset_id))
627 {
628 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
629 if (XINT (charset_id) == 0)
630 error ("There's no room for a new private charset %s",
631 XSYMBOL (charset_symbol)->name->data);
632 }
633
634 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
635 vec[4], vec[5], vec[6], vec[7], vec[8]);
636 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
637 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
638 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
639 return Qnil;
640 }
641
642 DEFUN ("generic-character-list", Fgeneric_character_list,
643 Sgeneric_character_list, 0, 0, 0,
644 "Return a list of all possible generic characters.\n\
645 It includes a generic character for a charset not yet defined.")
646 ()
647 {
648 return Vgeneric_character_list;
649 }
650
651 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
652 Sget_unused_iso_final_char, 2, 2, 0,
653 "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
654 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
655 CHARS is the number of characters in a dimension: 94 or 96.\n\
656 \n\
657 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
658 If there's no unused final char for the specified kind of charset,\n\
659 return nil.")
660 (dimension, chars)
661 Lisp_Object dimension, chars;
662 {
663 int final_char;
664
665 CHECK_NUMBER (dimension, 0);
666 CHECK_NUMBER (chars, 1);
667 if (XINT (dimension) != 1 && XINT (dimension) != 2)
668 error ("Invalid charset dimension %d, it should be 1 or 2",
669 XINT (dimension));
670 if (XINT (chars) != 94 && XINT (chars) != 96)
671 error ("Invalid charset chars %d, it should be 94 or 96",
672 XINT (chars));
673 for (final_char = '0'; final_char <= '?'; final_char++)
674 {
675 if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
676 break;
677 }
678 return (final_char <= '?' ? make_number (final_char) : Qnil);
679 }
680
681 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
682 4, 4, 0,
683 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
684 CHARSET should be defined by `defined-charset' in advance.")
685 (dimension, chars, final_char, charset_symbol)
686 Lisp_Object dimension, chars, final_char, charset_symbol;
687 {
688 int charset;
689
690 CHECK_NUMBER (dimension, 0);
691 CHECK_NUMBER (chars, 1);
692 CHECK_NUMBER (final_char, 2);
693 CHECK_SYMBOL (charset_symbol, 3);
694
695 if (XINT (dimension) != 1 && XINT (dimension) != 2)
696 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
697 if (XINT (chars) != 94 && XINT (chars) != 96)
698 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
699 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
700 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
701 if ((charset = get_charset_id (charset_symbol)) < 0)
702 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
703
704 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
705 return Qnil;
706 }
707
708 /* Return number of different charsets in STR of length LEN. In
709 addition, for each found charset N, CHARSETS[N] is set 1. The
710 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
711 It may lookup a translation table TABLE if supplied.
712
713 If CMPCHARP is nonzero and some composite character is found,
714 CHARSETS[128] is also set 1 and the returned number is incremented
715 by 1. */
716
717 int
718 find_charset_in_str (str, len, charsets, table, cmpcharp)
719 unsigned char *str;
720 int len, *charsets;
721 Lisp_Object table;
722 int cmpcharp;
723 {
724 register int num = 0, c;
725
726 if (! CHAR_TABLE_P (table))
727 table = Qnil;
728
729 while (len > 0)
730 {
731 int bytes, charset;
732 c = *str;
733
734 if (c == LEADING_CODE_COMPOSITION)
735 {
736 int cmpchar_id = str_cmpchar_id (str, len);
737 GLYPH *glyph;
738
739 if (cmpchar_id >= 0)
740 {
741 struct cmpchar_info *cmp_p = cmpchar_table[cmpchar_id];
742 int i;
743
744 for (i = 0; i < cmp_p->glyph_len; i++)
745 {
746 c = cmp_p->glyph[i];
747 if (!NILP (table))
748 {
749 if ((c = translate_char (table, c, 0, 0, 0)) < 0)
750 c = cmp_p->glyph[i];
751 }
752 if ((charset = CHAR_CHARSET (c)) < 0)
753 charset = CHARSET_ASCII;
754 if (!charsets[charset])
755 {
756 charsets[charset] = 1;
757 num += 1;
758 }
759 }
760 str += cmp_p->len;
761 len -= cmp_p->len;
762 if (cmpcharp && !charsets[CHARSET_COMPOSITION])
763 {
764 charsets[CHARSET_COMPOSITION] = 1;
765 num += 1;
766 }
767 continue;
768 }
769
770 charset = CHARSET_ASCII;
771 bytes = 1;
772 }
773 else
774 {
775 c = STRING_CHAR_AND_LENGTH (str, len, bytes);
776 if (! NILP (table))
777 {
778 int c1 = translate_char (table, c, 0, 0, 0);
779 if (c1 >= 0)
780 c = c1;
781 }
782 charset = CHAR_CHARSET (c);
783 }
784
785 if (!charsets[charset])
786 {
787 charsets[charset] = 1;
788 num += 1;
789 }
790 str += bytes;
791 len -= bytes;
792 }
793 return num;
794 }
795
796 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
797 2, 3, 0,
798 "Return a list of charsets in the region between BEG and END.\n\
799 BEG and END are buffer positions.\n\
800 If the region contains any composite character,\n\
801 `composition' is included in the returned list.\n\
802 Optional arg TABLE if non-nil is a translation table to look up.")
803 (beg, end, table)
804 Lisp_Object beg, end, table;
805 {
806 int charsets[MAX_CHARSET + 1];
807 int from, from_byte, to, stop, stop_byte, i;
808 Lisp_Object val;
809
810 validate_region (&beg, &end);
811 from = XFASTINT (beg);
812 stop = to = XFASTINT (end);
813
814 if (NILP (current_buffer->enable_multibyte_characters))
815 return (from == to
816 ? Qnil
817 : Fcons (Qascii, Qnil));
818
819 if (from < GPT && GPT < to)
820 {
821 stop = GPT;
822 stop_byte = GPT_BYTE;
823 }
824 else
825 stop_byte = CHAR_TO_BYTE (stop);
826
827 from_byte = CHAR_TO_BYTE (from);
828
829 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
830 while (1)
831 {
832 find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte,
833 charsets, table, 1);
834 if (stop < to)
835 {
836 from = stop, from_byte = stop_byte;
837 stop = to, stop_byte = CHAR_TO_BYTE (stop);
838 }
839 else
840 break;
841 }
842
843 val = Qnil;
844 for (i = MAX_CHARSET; i >= 0; i--)
845 if (charsets[i])
846 val = Fcons (CHARSET_SYMBOL (i), val);
847 return val;
848 }
849
850 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
851 1, 2, 0,
852 "Return a list of charsets in STR.\n\
853 If the string contains any composite characters,\n\
854 `composition' is included in the returned list.\n\
855 Optional arg TABLE if non-nil is a translation table to look up.")
856 (str, table)
857 Lisp_Object str, table;
858 {
859 int charsets[MAX_CHARSET + 1];
860 int i;
861 Lisp_Object val;
862
863 CHECK_STRING (str, 0);
864
865 if (! STRING_MULTIBYTE (str))
866 return (XSTRING (str)->size == 0
867 ? Qnil
868 : Fcons (Qascii, Qnil));
869
870 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
871 find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)),
872 charsets, table, 1);
873 val = Qnil;
874 for (i = MAX_CHARSET; i >= 0; i--)
875 if (charsets[i])
876 val = Fcons (CHARSET_SYMBOL (i), val);
877 return val;
878 }
879 \f
880 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
881 "")
882 (charset, code1, code2)
883 Lisp_Object charset, code1, code2;
884 {
885 CHECK_NUMBER (charset, 0);
886
887 if (NILP (code1))
888 XSETFASTINT (code1, 0);
889 else
890 CHECK_NUMBER (code1, 1);
891 if (NILP (code2))
892 XSETFASTINT (code2, 0);
893 else
894 CHECK_NUMBER (code2, 2);
895
896 if (!CHARSET_DEFINED_P (XINT (charset)))
897 error ("Invalid charset: %d", XINT (charset));
898
899 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
900 }
901
902 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
903 "Return list of charset and one or two position-codes of CHAR.")
904 (ch)
905 Lisp_Object ch;
906 {
907 Lisp_Object val;
908 int charset, c1, c2;
909
910 CHECK_NUMBER (ch, 0);
911 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
912 return (c2 >= 0
913 ? Fcons (CHARSET_SYMBOL (charset),
914 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
915 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
916 }
917
918 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
919 "Return charset of CHAR.")
920 (ch)
921 Lisp_Object ch;
922 {
923 CHECK_NUMBER (ch, 0);
924
925 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
926 }
927
928 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
929 "Return charset of a character in current buffer at position POS.\n\
930 If POS is nil, it defauls to the current point.")
931 (pos)
932 Lisp_Object pos;
933 {
934 register int pos_byte, c, charset;
935 register unsigned char *p;
936
937 if (NILP (pos))
938 pos_byte = PT_BYTE;
939 else if (MARKERP (pos))
940 pos_byte = marker_byte_position (pos);
941 else
942 {
943 CHECK_NUMBER (pos, 0);
944 pos_byte = CHAR_TO_BYTE (XINT (pos));
945 }
946 p = BYTE_POS_ADDR (pos_byte);
947 c = STRING_CHAR (p, Z_BYTE - pos_byte);
948 charset = CHAR_CHARSET (c);
949 return CHARSET_SYMBOL (charset);
950 }
951
952 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
953 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
954 \n\
955 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
956 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
957 where as Emacs distinguishes them by charset symbol.\n\
958 See the documentation of the function `charset-info' for the meanings of\n\
959 DIMENSION, CHARS, and FINAL-CHAR.")
960 (dimension, chars, final_char)
961 Lisp_Object dimension, chars, final_char;
962 {
963 int charset;
964
965 CHECK_NUMBER (dimension, 0);
966 CHECK_NUMBER (chars, 1);
967 CHECK_NUMBER (final_char, 2);
968
969 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
970 return Qnil;
971 return CHARSET_SYMBOL (charset);
972 }
973
974 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
975 generic character. If GENERICP is zero, return nonzero iff C is a
976 valid normal character. Do not call this function directly,
977 instead use macro CHAR_VALID_P. */
978 int
979 char_valid_p (c, genericp)
980 int c, genericp;
981 {
982 int charset, c1, c2;
983
984 if (c < 0)
985 return 0;
986 if (SINGLE_BYTE_CHAR_P (c))
987 return 1;
988 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
989 if (!CHARSET_VALID_P (charset))
990 return 0;
991 return (c < MIN_CHAR_COMPOSITION
992 ? ((c & CHAR_FIELD1_MASK) /* i.e. dimension of C is two. */
993 ? (genericp && c1 == 0 && c2 == 0
994 || c1 >= 32 && c2 >= 32)
995 : (genericp && c1 == 0
996 || c1 >= 32))
997 : c < MIN_CHAR_COMPOSITION + n_cmpchars);
998 }
999
1000 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1001 "Return t if OBJECT is a valid normal character.\n\
1002 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
1003 a valid generic character.")
1004 (object, genericp)
1005 Lisp_Object object, genericp;
1006 {
1007 if (! NATNUMP (object))
1008 return Qnil;
1009 return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1010 }
1011
1012 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1013 Sunibyte_char_to_multibyte, 1, 1, 0,
1014 "Convert the unibyte character CH to multibyte character.\n\
1015 The conversion is done based on `nonascii-translation-table' (which see)\n\
1016 or `nonascii-insert-offset' (which see).")
1017 (ch)
1018 Lisp_Object ch;
1019 {
1020 int c;
1021
1022 CHECK_NUMBER (ch, 0);
1023 c = XINT (ch);
1024 if (c < 0 || c >= 0400)
1025 error ("Invalid unibyte character: %d", c);
1026 c = unibyte_char_to_multibyte (c);
1027 if (c < 0)
1028 error ("Can't convert to multibyte character: %d", XINT (ch));
1029 return make_number (c);
1030 }
1031
1032 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1033 Smultibyte_char_to_unibyte, 1, 1, 0,
1034 "Convert the multibyte character CH to unibyte character.\n\
1035 The conversion is done based on `nonascii-translation-table' (which see)\n\
1036 or `nonascii-insert-offset' (which see).")
1037 (ch)
1038 Lisp_Object ch;
1039 {
1040 int c;
1041
1042 CHECK_NUMBER (ch, 0);
1043 c = XINT (ch);
1044 if (c < 0)
1045 error ("Invalid multibyte character: %d", c);
1046 c = multibyte_char_to_unibyte (c, Qnil);
1047 if (c < 0)
1048 error ("Can't convert to unibyte character: %d", XINT (ch));
1049 return make_number (c);
1050 }
1051
1052 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1053 "Return 1 regardless of the argument CHAR.\n\
1054 This is now an obsolete function. We keep it just for backward compatibility.")
1055 (ch)
1056 Lisp_Object ch;
1057 {
1058 Lisp_Object val;
1059
1060 CHECK_NUMBER (ch, 0);
1061 return make_number (1);
1062 }
1063
1064 /* Return how many bytes C will occupy in a multibyte buffer.
1065 Don't call this function directly, instead use macro CHAR_BYTES. */
1066 int
1067 char_bytes (c)
1068 int c;
1069 {
1070 int bytes;
1071
1072 if (COMPOSITE_CHAR_P (c))
1073 {
1074 unsigned int id = COMPOSITE_CHAR_ID (c);
1075
1076 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
1077 }
1078 else
1079 {
1080 int charset = CHAR_CHARSET (c);
1081
1082 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
1083 }
1084
1085 return bytes;
1086 }
1087
1088 /* Return the width of character of which multi-byte form starts with
1089 C. The width is measured by how many columns occupied on the
1090 screen when displayed in the current buffer. */
1091
1092 #define ONE_BYTE_CHAR_WIDTH(c) \
1093 (c < 0x20 \
1094 ? (c == '\t' \
1095 ? XFASTINT (current_buffer->tab_width) \
1096 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1097 : (c < 0x7f \
1098 ? 1 \
1099 : (c == 0x7F \
1100 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1101 : ((! NILP (current_buffer->enable_multibyte_characters) \
1102 && BASE_LEADING_CODE_P (c)) \
1103 ? WIDTH_BY_CHAR_HEAD (c) \
1104 : 4))))
1105
1106 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1107 "Return width of CHAR when displayed in the current buffer.\n\
1108 The width is measured by how many columns it occupies on the screen.")
1109 (ch)
1110 Lisp_Object ch;
1111 {
1112 Lisp_Object val, disp;
1113 int c;
1114 struct Lisp_Char_Table *dp = buffer_display_table ();
1115
1116 CHECK_NUMBER (ch, 0);
1117
1118 c = XINT (ch);
1119
1120 /* Get the way the display table would display it. */
1121 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1122
1123 if (VECTORP (disp))
1124 XSETINT (val, XVECTOR (disp)->size);
1125 else if (SINGLE_BYTE_CHAR_P (c))
1126 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1127 else if (COMPOSITE_CHAR_P (c))
1128 {
1129 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
1130 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
1131 }
1132 else
1133 {
1134 int charset = CHAR_CHARSET (c);
1135
1136 XSETFASTINT (val, CHARSET_WIDTH (charset));
1137 }
1138 return val;
1139 }
1140
1141 /* Return width of string STR of length LEN when displayed in the
1142 current buffer. The width is measured by how many columns it
1143 occupies on the screen. */
1144
1145 int
1146 strwidth (str, len)
1147 unsigned char *str;
1148 int len;
1149 {
1150 unsigned char *endp = str + len;
1151 int width = 0;
1152 struct Lisp_Char_Table *dp = buffer_display_table ();
1153
1154 while (str < endp)
1155 {
1156 if (*str == LEADING_CODE_COMPOSITION)
1157 {
1158 int id = str_cmpchar_id (str, endp - str);
1159
1160 if (id < 0)
1161 {
1162 width += 4;
1163 str++;
1164 }
1165 else
1166 {
1167 width += cmpchar_table[id]->width;
1168 str += cmpchar_table[id]->len;
1169 }
1170 }
1171 else
1172 {
1173 Lisp_Object disp;
1174 int thislen;
1175 int c = STRING_CHAR_AND_LENGTH (str, endp - str, thislen);
1176
1177 /* Get the way the display table would display it. */
1178 if (dp)
1179 disp = DISP_CHAR_VECTOR (dp, c);
1180 else
1181 disp = Qnil;
1182
1183 if (VECTORP (disp))
1184 width += XVECTOR (disp)->size;
1185 else
1186 width += ONE_BYTE_CHAR_WIDTH (*str);
1187
1188 str += thislen;
1189 }
1190 }
1191 return width;
1192 }
1193
1194 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1195 "Return width of STRING when displayed in the current buffer.\n\
1196 Width is measured by how many columns it occupies on the screen.\n\
1197 When calculating width of a multibyte character in STRING,\n\
1198 only the base leading-code is considered; the validity of\n\
1199 the following bytes is not checked.")
1200 (str)
1201 Lisp_Object str;
1202 {
1203 Lisp_Object val;
1204
1205 CHECK_STRING (str, 0);
1206 XSETFASTINT (val, strwidth (XSTRING (str)->data,
1207 STRING_BYTES (XSTRING (str))));
1208 return val;
1209 }
1210
1211 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1212 "Return the direction of CHAR.\n\
1213 The returned value is 0 for left-to-right and 1 for right-to-left.")
1214 (ch)
1215 Lisp_Object ch;
1216 {
1217 int charset;
1218
1219 CHECK_NUMBER (ch, 0);
1220 charset = CHAR_CHARSET (XFASTINT (ch));
1221 if (!CHARSET_DEFINED_P (charset))
1222 invalid_character (XINT (ch));
1223 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1224 }
1225
1226 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1227 "Return number of characters between BEG and END.")
1228 (beg, end)
1229 Lisp_Object beg, end;
1230 {
1231 int from, to;
1232
1233 CHECK_NUMBER_COERCE_MARKER (beg, 0);
1234 CHECK_NUMBER_COERCE_MARKER (end, 1);
1235
1236 from = min (XFASTINT (beg), XFASTINT (end));
1237 to = max (XFASTINT (beg), XFASTINT (end));
1238
1239 return make_number (to - from);
1240 }
1241
1242 /* Return the number of characters in the NBYTES bytes at PTR.
1243 This works by looking at the contents and checking for multibyte sequences.
1244 However, if the current buffer has enable-multibyte-characters = nil,
1245 we treat each byte as a character. */
1246
1247 int
1248 chars_in_text (ptr, nbytes)
1249 unsigned char *ptr;
1250 int nbytes;
1251 {
1252 unsigned char *endp, c;
1253 int chars;
1254
1255 /* current_buffer is null at early stages of Emacs initialization. */
1256 if (current_buffer == 0
1257 || NILP (current_buffer->enable_multibyte_characters))
1258 return nbytes;
1259
1260 endp = ptr + nbytes;
1261 chars = 0;
1262
1263 while (ptr < endp)
1264 {
1265 c = *ptr++;
1266
1267 if (BASE_LEADING_CODE_P (c))
1268 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
1269 chars++;
1270 }
1271
1272 return chars;
1273 }
1274
1275 /* Return the number of characters in the NBYTES bytes at PTR.
1276 This works by looking at the contents and checking for multibyte sequences.
1277 It ignores enable-multibyte-characters. */
1278
1279 int
1280 multibyte_chars_in_text (ptr, nbytes)
1281 unsigned char *ptr;
1282 int nbytes;
1283 {
1284 unsigned char *endp, c;
1285 int chars;
1286
1287 endp = ptr + nbytes;
1288 chars = 0;
1289
1290 while (ptr < endp)
1291 {
1292 c = *ptr++;
1293
1294 if (BASE_LEADING_CODE_P (c))
1295 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
1296 chars++;
1297 }
1298
1299 return chars;
1300 }
1301
1302 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
1303 "Concatenate all the argument characters and make the result a string.")
1304 (n, args)
1305 int n;
1306 Lisp_Object *args;
1307 {
1308 int i;
1309 unsigned char *buf
1310 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
1311 unsigned char *p = buf;
1312 Lisp_Object val;
1313
1314 for (i = 0; i < n; i++)
1315 {
1316 int c, len;
1317 unsigned char *str;
1318
1319 if (!INTEGERP (args[i]))
1320 CHECK_NUMBER (args[i], 0);
1321 c = XINT (args[i]);
1322 len = CHAR_STRING (c, p, str);
1323 if (p != str)
1324 /* C is a composite character. */
1325 bcopy (str, p, len);
1326 p += len;
1327 }
1328
1329 /* Here, we can't use make_string_from_bytes because of byte
1330 combining problem. */
1331 val = make_string (buf, p - buf);
1332 return val;
1333 }
1334
1335 #endif /* emacs */
1336 \f
1337 /*** Composite characters staffs ***/
1338
1339 /* Each composite character is identified by CMPCHAR-ID which is
1340 assigned when Emacs needs the character code of the composite
1341 character (e.g. when displaying it on the screen). See the
1342 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
1343 composite character is represented in Emacs. */
1344
1345 /* If `static' is defined, it means that it is defined to null string. */
1346 #ifndef static
1347 /* The following function is copied from lread.c. */
1348 static int
1349 hash_string (ptr, len)
1350 unsigned char *ptr;
1351 int len;
1352 {
1353 register unsigned char *p = ptr;
1354 register unsigned char *end = p + len;
1355 register unsigned char c;
1356 register int hash = 0;
1357
1358 while (p != end)
1359 {
1360 c = *p++;
1361 if (c >= 0140) c -= 40;
1362 hash = ((hash<<3) + (hash>>28) + c);
1363 }
1364 return hash & 07777777777;
1365 }
1366 #endif
1367
1368 #define CMPCHAR_HASH_TABLE_SIZE 0xFFF
1369
1370 static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
1371
1372 /* Each element of `cmpchar_hash_table' is a pointer to an array of
1373 integer, where the 1st element is the size of the array, the 2nd
1374 element is how many elements are actually used in the array, and
1375 the remaining elements are CMPCHAR-IDs of composite characters of
1376 the same hash value. */
1377 #define CMPCHAR_HASH_SIZE(table) table[0]
1378 #define CMPCHAR_HASH_USED(table) table[1]
1379 #define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1380
1381 /* Return CMPCHAR-ID of the composite character in STR of the length
1382 LEN. If the composite character has not yet been registered,
1383 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1384 is the sole function for assigning CMPCHAR-ID. */
1385 int
1386 str_cmpchar_id (str, len)
1387 const unsigned char *str;
1388 int len;
1389 {
1390 int hash_idx, *hashp;
1391 unsigned char *buf;
1392 int embedded_rule; /* 1 if composition rule is embedded. */
1393 int chars; /* number of components. */
1394 int i;
1395 struct cmpchar_info *cmpcharp;
1396
1397 /* The second byte 0xFF means compostion rule is embedded. */
1398 embedded_rule = (str[1] == 0xFF);
1399
1400 /* At first, get the actual length of the composite character. */
1401 {
1402 const unsigned char *p, *endp = str + 1, *lastp = str + len;
1403 int bytes;
1404
1405 while (endp < lastp && ! CHAR_HEAD_P (*endp)) endp++;
1406 if (endp - str < 5)
1407 /* Any composite char have at least 5-byte length. */
1408 return -1;
1409
1410 chars = 0;
1411 p = str + 1;
1412 while (p < endp)
1413 {
1414 if (embedded_rule) p++;
1415 /* No need of checking if *P is 0xA0 because
1416 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1417 p += BYTES_BY_CHAR_HEAD (*p - 0x20);
1418 chars++;
1419 }
1420 if (p > endp || chars < 2 || chars > MAX_COMPONENT_COUNT)
1421 /* Invalid components. */
1422 return -1;
1423 len = p - str;
1424 }
1425 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
1426 hashp = cmpchar_hash_table[hash_idx];
1427
1428 /* Then, look into the hash table. */
1429 if (hashp != NULL)
1430 /* Find the correct one among composite characters of the same
1431 hash value. */
1432 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
1433 {
1434 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
1435 if (len == cmpcharp->len
1436 && ! bcmp (str, cmpcharp->data, len))
1437 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
1438 }
1439
1440 /* We have to register the composite character in cmpchar_table. */
1441 if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
1442 /* No, we have no more room for a new composite character. */
1443 return -1;
1444
1445 /* Make the entry in hash table. */
1446 if (hashp == NULL)
1447 {
1448 /* Make a table for 8 composite characters initially. */
1449 hashp = (cmpchar_hash_table[hash_idx]
1450 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1451 CMPCHAR_HASH_SIZE (hashp) = 10;
1452 CMPCHAR_HASH_USED (hashp) = 2;
1453 }
1454 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1455 {
1456 CMPCHAR_HASH_SIZE (hashp) += 8;
1457 hashp = (cmpchar_hash_table[hash_idx]
1458 = (int *) xrealloc (hashp,
1459 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1460 }
1461 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1462 CMPCHAR_HASH_USED (hashp)++;
1463
1464 /* Set information of the composite character in cmpchar_table. */
1465 if (cmpchar_table_size == 0)
1466 {
1467 /* This is the first composite character to be registered. */
1468 cmpchar_table_size = 256;
1469 cmpchar_table
1470 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1471 * cmpchar_table_size);
1472 }
1473 else if (cmpchar_table_size <= n_cmpchars)
1474 {
1475 cmpchar_table_size += 256;
1476 cmpchar_table
1477 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1478 sizeof (cmpchar_table[0])
1479 * cmpchar_table_size);
1480 }
1481
1482 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1483
1484 cmpcharp->len = len;
1485 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1486 bcopy (str, cmpcharp->data, len);
1487 cmpcharp->data[len] = 0;
1488 cmpcharp->glyph_len = chars;
1489 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1490 if (embedded_rule)
1491 {
1492 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1493 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1494 }
1495 else
1496 {
1497 cmpcharp->cmp_rule = NULL;
1498 cmpcharp->col_offset = NULL;
1499 }
1500
1501 /* Setup GLYPH data and composition rules (if any) so as not to make
1502 them every time on displaying. */
1503 {
1504 unsigned char *bufp;
1505 int width;
1506 float leftmost = 0.0, rightmost = 1.0;
1507
1508 if (embedded_rule)
1509 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1510 cmpcharp->col_offset[0] = 0;
1511
1512 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1513 {
1514 if (embedded_rule)
1515 cmpcharp->cmp_rule[i] = *bufp++;
1516
1517 if (*bufp == 0xA0) /* This is an ASCII character. */
1518 {
1519 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1520 width = 1;
1521 bufp++;
1522 }
1523 else /* Multibyte character. */
1524 {
1525 /* Make `bufp' point normal multi-byte form temporally. */
1526 *bufp -= 0x20;
1527 cmpcharp->glyph[i]
1528 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0, 0), 0);
1529 width = WIDTH_BY_CHAR_HEAD (*bufp);
1530 *bufp += 0x20;
1531 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1532 }
1533
1534 if (embedded_rule && i > 0)
1535 {
1536 /* Reference points (global_ref and new_ref) are
1537 encoded as below:
1538
1539 0--1--2 -- ascent
1540 | |
1541 | |
1542 | 4 -+--- center
1543 -- 3 5 -- baseline
1544 | |
1545 6--7--8 -- descent
1546
1547 Now, we calculate the column offset of the new glyph
1548 from the left edge of the first glyph. This can avoid
1549 the same calculation everytime displaying this
1550 composite character. */
1551
1552 /* Reference points of global glyph and new glyph. */
1553 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1554 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1555 /* Column offset relative to the first glyph. */
1556 float left = (leftmost
1557 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1558 - (new_ref % 3) * width / 2.0);
1559
1560 cmpcharp->col_offset[i] = left;
1561 if (left < leftmost)
1562 leftmost = left;
1563 if (left + width > rightmost)
1564 rightmost = left + width;
1565 }
1566 else
1567 {
1568 if (width > rightmost)
1569 rightmost = width;
1570 }
1571 }
1572 if (embedded_rule)
1573 {
1574 /* Now col_offset[N] are relative to the left edge of the
1575 first component. Make them relative to the left edge of
1576 overall glyph. */
1577 for (i = 0; i < chars; i++)
1578 cmpcharp->col_offset[i] -= leftmost;
1579 /* Make rightmost holds width of overall glyph. */
1580 rightmost -= leftmost;
1581 }
1582
1583 cmpcharp->width = rightmost;
1584 if (cmpcharp->width < rightmost)
1585 /* To get a ceiling integer value. */
1586 cmpcharp->width++;
1587 }
1588
1589 cmpchar_table[n_cmpchars] = cmpcharp;
1590
1591 return n_cmpchars++;
1592 }
1593
1594 /* Return the Nth element of the composite character C. */
1595 int
1596 cmpchar_component (c, n)
1597 unsigned int c, n;
1598 {
1599 int id = COMPOSITE_CHAR_ID (c);
1600
1601 if (id >= n_cmpchars /* C is not a valid composite character. */
1602 || n >= cmpchar_table[id]->glyph_len) /* No such component. */
1603 return -1;
1604 /* No face data is stored in glyph code. */
1605 return ((int) (cmpchar_table[id]->glyph[n]));
1606 }
1607
1608 DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1609 "T if CHAR is a composite character.")
1610 (ch)
1611 Lisp_Object ch;
1612 {
1613 CHECK_NUMBER (ch, 0);
1614 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1615 }
1616
1617 DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1618 2, 2, 0,
1619 "Return the IDXth component character of composite character CHARACTER.")
1620 (character, idx)
1621 Lisp_Object character, idx;
1622 {
1623 int c;
1624
1625 CHECK_NUMBER (character, 0);
1626 CHECK_NUMBER (idx, 1);
1627
1628 if ((c = cmpchar_component (XINT (character), XINT (idx))) < 0)
1629 args_out_of_range (character, idx);
1630
1631 return make_number (c);
1632 }
1633
1634 DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1635 2, 2, 0,
1636 "Return the Nth composition rule embedded in composite character CHARACTER.\n\
1637 The returned rule is for composing the Nth component\n\
1638 on the (N-1)th component. If N is 0, the returned value is always 255.")
1639 (character, n)
1640 Lisp_Object character, n;
1641 {
1642 int id, i;
1643
1644 CHECK_NUMBER (character, 0);
1645 CHECK_NUMBER (n, 1);
1646
1647 id = COMPOSITE_CHAR_ID (XINT (character));
1648 if (id < 0 || id >= n_cmpchars)
1649 error ("Invalid composite character: %d", XINT (character));
1650 i = XINT (n);
1651 if (i > cmpchar_table[id]->glyph_len)
1652 args_out_of_range (character, n);
1653
1654 return make_number (cmpchar_table[id]->cmp_rule[i]);
1655 }
1656
1657 DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1658 Scmpchar_cmp_rule_p, 1, 1, 0,
1659 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1660 (character)
1661 Lisp_Object character;
1662 {
1663 int id;
1664
1665 CHECK_NUMBER (character, 0);
1666 id = COMPOSITE_CHAR_ID (XINT (character));
1667 if (id < 0 || id >= n_cmpchars)
1668 error ("Invalid composite character: %d", XINT (character));
1669
1670 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1671 }
1672
1673 DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1674 Scmpchar_cmp_count, 1, 1, 0,
1675 "Return number of compoents of composite character CHARACTER.")
1676 (character)
1677 Lisp_Object character;
1678 {
1679 int id;
1680
1681 CHECK_NUMBER (character, 0);
1682 id = COMPOSITE_CHAR_ID (XINT (character));
1683 if (id < 0 || id >= n_cmpchars)
1684 error ("Invalid composite character: %d", XINT (character));
1685
1686 return (make_number (cmpchar_table[id]->glyph_len));
1687 }
1688
1689 DEFUN ("compose-string", Fcompose_string, Scompose_string,
1690 1, 1, 0,
1691 "Return one char string composed from all characters in STRING.")
1692 (str)
1693 Lisp_Object str;
1694 {
1695 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1696 int len, i;
1697
1698 CHECK_STRING (str, 0);
1699
1700 buf[0] = LEADING_CODE_COMPOSITION;
1701 p = XSTRING (str)->data;
1702 pend = p + STRING_BYTES (XSTRING (str));
1703 i = 1;
1704 while (p < pend)
1705 {
1706 if (*p < 0x20 || *p == 127) /* control code */
1707 error ("Invalid component character: %d", *p);
1708 else if (*p < 0x80) /* ASCII */
1709 {
1710 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1711 error ("Too long string to be composed: %s", XSTRING (str)->data);
1712 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1713 code itself. */
1714 buf[i++] = 0xA0;
1715 buf[i++] = *p++ + 0x80;
1716 }
1717 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1718 {
1719 /* Already composed. Eliminate the heading
1720 LEADING_CODE_COMPOSITION, keep the remaining bytes
1721 unchanged. */
1722 p++;
1723 ptemp = p;
1724 while (! CHAR_HEAD_P (*p)) p++;
1725 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1726 error ("Too long string to be composed: %s", XSTRING (str)->data);
1727 bcopy (ptemp, buf + i, p - ptemp);
1728 i += p - ptemp;
1729 }
1730 else /* multibyte char */
1731 {
1732 /* Add 0x20 to the base leading-code, keep the remaining
1733 bytes unchanged. */
1734 len = BYTES_BY_CHAR_HEAD (*p);
1735 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1736 error ("Too long string to be composed: %s", XSTRING (str)->data);
1737 bcopy (p, buf + i, len);
1738 buf[i] += 0x20;
1739 p += len, i += len;
1740 }
1741 }
1742
1743 if (i < 5)
1744 /* STR contains only one character, which can't be composed. */
1745 error ("Too short string to be composed: %s", XSTRING (str)->data);
1746
1747 return make_string_from_bytes (buf, 1, i);
1748 }
1749
1750 \f
1751 int
1752 charset_id_internal (charset_name)
1753 char *charset_name;
1754 {
1755 Lisp_Object val;
1756
1757 val= Fget (intern (charset_name), Qcharset);
1758 if (!VECTORP (val))
1759 error ("Charset %s is not defined", charset_name);
1760
1761 return (XINT (XVECTOR (val)->contents[0]));
1762 }
1763
1764 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1765 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1766 ()
1767 {
1768 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1769 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1770 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1771 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1772 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1773 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1774 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1775 return Qnil;
1776 }
1777
1778 void
1779 init_charset_once ()
1780 {
1781 int i, j, k;
1782
1783 staticpro (&Vcharset_table);
1784 staticpro (&Vcharset_symbol_table);
1785 staticpro (&Vgeneric_character_list);
1786
1787 /* This has to be done here, before we call Fmake_char_table. */
1788 Qcharset_table = intern ("charset-table");
1789 staticpro (&Qcharset_table);
1790
1791 /* Intern this now in case it isn't already done.
1792 Setting this variable twice is harmless.
1793 But don't staticpro it here--that is done in alloc.c. */
1794 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1795
1796 /* Now we are ready to set up this property, so we can
1797 create the charset table. */
1798 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1799 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1800
1801 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil);
1802
1803 /* Setup tables. */
1804 for (i = 0; i < 2; i++)
1805 for (j = 0; j < 2; j++)
1806 for (k = 0; k < 128; k++)
1807 iso_charset_table [i][j][k] = -1;
1808
1809 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1810 cmpchar_table_size = n_cmpchars = 0;
1811
1812 for (i = 0; i < 128; i++)
1813 BYTES_BY_CHAR_HEAD (i) = 1;
1814 for (i = MIN_CHARSET_OFFICIAL_DIMENSION1;
1815 i <= MAX_CHARSET_OFFICIAL_DIMENSION1; i++)
1816 BYTES_BY_CHAR_HEAD (i) = 2;
1817 for (i = MIN_CHARSET_OFFICIAL_DIMENSION2;
1818 i <= MAX_CHARSET_OFFICIAL_DIMENSION2; i++)
1819 BYTES_BY_CHAR_HEAD (i) = 3;
1820 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1821 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1822 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1823 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1824 /* The followings don't reflect the actual bytes, but just to tell
1825 that it is a start of a multibyte character. */
1826 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1827 BYTES_BY_CHAR_HEAD (0x9E) = 2;
1828 BYTES_BY_CHAR_HEAD (0x9F) = 2;
1829
1830 for (i = 0; i < 128; i++)
1831 WIDTH_BY_CHAR_HEAD (i) = 1;
1832 for (; i < 256; i++)
1833 WIDTH_BY_CHAR_HEAD (i) = 4;
1834 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1835 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1836 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1837 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1838
1839 {
1840 Lisp_Object val;
1841
1842 val = Qnil;
1843 for (i = 0x81; i < 0x90; i++)
1844 val = Fcons (make_number ((i - 0x70) << 7), val);
1845 for (; i < 0x9A; i++)
1846 val = Fcons (make_number ((i - 0x8F) << 14), val);
1847 for (i = 0xA0; i < 0xF0; i++)
1848 val = Fcons (make_number ((i - 0x70) << 7), val);
1849 for (; i < 0xFF; i++)
1850 val = Fcons (make_number ((i - 0xE0) << 14), val);
1851 val = Fcons (make_number (GENERIC_COMPOSITION_CHAR), val);
1852 Vgeneric_character_list = Fnreverse (val);
1853 }
1854
1855 nonascii_insert_offset = 0;
1856 Vnonascii_translation_table = Qnil;
1857 }
1858
1859 #ifdef emacs
1860
1861 void
1862 syms_of_charset ()
1863 {
1864 Qascii = intern ("ascii");
1865 staticpro (&Qascii);
1866
1867 Qcharset = intern ("charset");
1868 staticpro (&Qcharset);
1869
1870 /* Define ASCII charset now. */
1871 update_charset_table (make_number (CHARSET_ASCII),
1872 make_number (1), make_number (94),
1873 make_number (1),
1874 make_number (0),
1875 make_number ('B'),
1876 make_number (0),
1877 build_string ("ASCII"),
1878 build_string ("ASCII"),
1879 build_string ("ASCII (ISO646 IRV)"));
1880 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1881 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1882
1883 Qcomposition = intern ("composition");
1884 staticpro (&Qcomposition);
1885 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1886
1887 Qauto_fill_chars = intern ("auto-fill-chars");
1888 staticpro (&Qauto_fill_chars);
1889 Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1890
1891 defsubr (&Sdefine_charset);
1892 defsubr (&Sgeneric_character_list);
1893 defsubr (&Sget_unused_iso_final_char);
1894 defsubr (&Sdeclare_equiv_charset);
1895 defsubr (&Sfind_charset_region);
1896 defsubr (&Sfind_charset_string);
1897 defsubr (&Smake_char_internal);
1898 defsubr (&Ssplit_char);
1899 defsubr (&Schar_charset);
1900 defsubr (&Scharset_after);
1901 defsubr (&Siso_charset);
1902 defsubr (&Schar_valid_p);
1903 defsubr (&Sunibyte_char_to_multibyte);
1904 defsubr (&Smultibyte_char_to_unibyte);
1905 defsubr (&Schar_bytes);
1906 defsubr (&Schar_width);
1907 defsubr (&Sstring_width);
1908 defsubr (&Schar_direction);
1909 defsubr (&Schars_in_region);
1910 defsubr (&Sstring);
1911 defsubr (&Scmpcharp);
1912 defsubr (&Scmpchar_component);
1913 defsubr (&Scmpchar_cmp_rule);
1914 defsubr (&Scmpchar_cmp_rule_p);
1915 defsubr (&Scmpchar_cmp_count);
1916 defsubr (&Scompose_string);
1917 defsubr (&Ssetup_special_charsets);
1918
1919 DEFVAR_LISP ("charset-list", &Vcharset_list,
1920 "List of charsets ever defined.");
1921 Vcharset_list = Fcons (Qascii, Qnil);
1922
1923 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
1924 "Vector of cons cell of a symbol and translation table ever defined.\n\
1925 An ID of a translation table is an index of this vector.");
1926 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1927
1928 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1929 "Leading-code of composite characters.");
1930 leading_code_composition = LEADING_CODE_COMPOSITION;
1931
1932 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1933 "Leading-code of private TYPE9N charset of column-width 1.");
1934 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1935
1936 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1937 "Leading-code of private TYPE9N charset of column-width 2.");
1938 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1939
1940 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1941 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1942 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1943
1944 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1945 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1946 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1947
1948 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1949 "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
1950 This is used for converting unibyte text to multibyte,\n\
1951 and for inserting character codes specified by number.\n\n\
1952 This serves to convert a Latin-1 or similar 8-bit character code\n\
1953 to the corresponding Emacs multibyte character code.\n\
1954 Typically the value should be (- (make-char CHARSET 0) 128),\n\
1955 for your choice of character set.\n\
1956 If `nonascii-translation-table' is non-nil, it overrides this variable.");
1957 nonascii_insert_offset = 0;
1958
1959 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1960 "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
1961 This is used for converting unibyte text to multibyte,\n\
1962 and for inserting character codes specified by number.\n\n\
1963 Conversion is performed only when multibyte characters are enabled,\n\
1964 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1965 to the corresponding Emacs character code.\n\n\
1966 If this is nil, `nonascii-insert-offset' is used instead.\n\
1967 See also the docstring of `make-translation-table'.");
1968 Vnonascii_translation_table = Qnil;
1969
1970 DEFVAR_INT ("min-composite-char", &min_composite_char,
1971 "Minimum character code of a composite character.");
1972 min_composite_char = MIN_CHAR_COMPOSITION;
1973
1974 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1975 "A char-table for characters which invoke auto-filling.\n\
1976 Such characters has value t in this table.");
1977 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1978 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1979 CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1980 }
1981
1982 #endif /* emacs */