]> code.delx.au - gnu-emacs/blob - src/charset.c
(non_ascii_char_to_string): If C is negative, signal error.
[gnu-emacs] / src / charset.c
1 /* Basic multilingual character support.
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
4
5 This file is part of GNU Emacs.
6
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
21
22 /* At first, see the document in `charset.h' to understand the code in
23 this file. */
24
25 #include <stdio.h>
26
27 #ifdef emacs
28
29 #include <sys/types.h>
30 #include <config.h>
31 #include "lisp.h"
32 #include "buffer.h"
33 #include "charset.h"
34 #include "coding.h"
35 #include "disptab.h"
36
37 #else /* not emacs */
38
39 #include "mulelib.h"
40
41 #endif /* emacs */
42
43 Lisp_Object Qcharset, Qascii, Qcomposition;
44
45 /* Declaration of special leading-codes. */
46 int leading_code_composition; /* for composite characters */
47 int leading_code_private_11; /* for private DIMENSION1 of 1-column */
48 int leading_code_private_12; /* for private DIMENSION1 of 2-column */
49 int leading_code_private_21; /* for private DIMENSION2 of 1-column */
50 int leading_code_private_22; /* for private DIMENSION2 of 2-column */
51
52 /* Declaration of special charsets. */
53 int charset_ascii; /* ASCII */
54 int charset_composition; /* for a composite character */
55 int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
56 int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
57 int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
58 int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
59 int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
60 int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
61 int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
62
63 int min_composite_char;
64
65 Lisp_Object Qcharset_table;
66
67 /* A char-table containing information of each character set. */
68 Lisp_Object Vcharset_table;
69
70 /* A vector of charset symbol indexed by charset-id. This is used
71 only for returning charset symbol from C functions. */
72 Lisp_Object Vcharset_symbol_table;
73
74 /* A list of charset symbols ever defined. */
75 Lisp_Object Vcharset_list;
76
77 /* Vector of translation table ever defined.
78 ID of a translation table is used to index this vector. */
79 Lisp_Object Vtranslation_table_vector;
80
81 /* A char-table for characters which may invoke auto-filling. */
82 Lisp_Object Vauto_fill_chars;
83
84 Lisp_Object Qauto_fill_chars;
85
86 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
87 int bytes_by_char_head[256];
88 int width_by_char_head[256];
89
90 /* Mapping table from ISO2022's charset (specified by DIMENSION,
91 CHARS, and FINAL-CHAR) to Emacs' charset. */
92 int iso_charset_table[2][2][128];
93
94 /* Table of pointers to the structure `cmpchar_info' indexed by
95 CMPCHAR-ID. */
96 struct cmpchar_info **cmpchar_table;
97 /* The current size of `cmpchar_table'. */
98 static int cmpchar_table_size;
99 /* Number of the current composite characters. */
100 int n_cmpchars;
101
102 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
103 unsigned char *_fetch_multibyte_char_p;
104 int _fetch_multibyte_char_len;
105
106 /* Offset to add to a non-ASCII value when inserting it. */
107 int nonascii_insert_offset;
108
109 /* Translation table for converting non-ASCII unibyte characters
110 to multibyte codes, or nil. */
111 Lisp_Object Vnonascii_translation_table;
112
113 /* List of all possible generic characters. */
114 Lisp_Object Vgeneric_character_list;
115
116 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
117 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
118 \f
119 void
120 invalid_character (c)
121 int c;
122 {
123 error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
124 }
125
126
127 /* Set STR a pointer to the multi-byte form of the character C. If C
128 is not a composite character, the multi-byte form is set in WORKBUF
129 and STR points WORKBUF. The caller should allocate at least 4-byte
130 area at WORKBUF in advance. Returns the length of the multi-byte
131 form. If C is an invalid character to have a multi-byte form,
132 signal an error.
133
134 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
135 function directly if C can be an ASCII character. */
136
137 int
138 non_ascii_char_to_string (c, workbuf, str)
139 int c;
140 unsigned char *workbuf, **str;
141 {
142 int charset, c1, c2;
143
144 if (c < 0)
145 invalid_character (c);
146
147 if (COMPOSITE_CHAR_P (c))
148 {
149 int cmpchar_id = COMPOSITE_CHAR_ID (c);
150
151 if (cmpchar_id < n_cmpchars)
152 {
153 *str = cmpchar_table[cmpchar_id]->data;
154 return cmpchar_table[cmpchar_id]->len;
155 }
156 else
157 {
158 invalid_character (c);
159 }
160 }
161
162 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
163 if (!charset
164 || ! CHARSET_DEFINED_P (charset)
165 || c1 >= 0 && c1 < 32
166 || c2 >= 0 && c2 < 32)
167 invalid_character (c);
168
169 *str = workbuf;
170 *workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
171 if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
172 workbuf++;
173 *workbuf++ = c1 | 0x80;
174 if (c2 >= 0)
175 *workbuf++ = c2 | 0x80;
176
177 return (workbuf - *str);
178 }
179
180 /* Return a non-ASCII character of which multi-byte form is at STR of
181 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
182 multibyte form is set to the address ACTUAL_LEN.
183
184 If exclude_tail_garbage is nonzero, ACTUAL_LEN excludes gabage
185 bytes following the non-ASCII character.
186
187 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
188 directly if STR can hold an ASCII character. */
189
190 int
191 string_to_non_ascii_char (str, len, actual_len, exclude_tail_garbage)
192 const unsigned char *str;
193 int len, *actual_len, exclude_tail_garbage;
194 {
195 int charset;
196 unsigned char c1, c2;
197 int c, bytes;
198 const unsigned char *begp = str;
199
200 c = *str++;
201 bytes = 1;
202
203 if (BASE_LEADING_CODE_P (c))
204 do {
205 while (bytes < len && ! CHAR_HEAD_P (begp[bytes])) bytes++;
206
207 if (c == LEADING_CODE_COMPOSITION)
208 {
209 int cmpchar_id = str_cmpchar_id (begp, bytes);
210
211 if (cmpchar_id >= 0)
212 {
213 c = MAKE_COMPOSITE_CHAR (cmpchar_id);
214 str += cmpchar_table[cmpchar_id]->len - 1;
215 }
216 else
217 str += bytes - 1;
218 }
219 else
220 {
221 const unsigned char *endp = begp + bytes;
222 int charset = c, c1, c2 = 0;
223
224 if (str >= endp) break;
225 if (c >= LEADING_CODE_PRIVATE_11 && c <= LEADING_CODE_PRIVATE_22)
226 {
227 charset = *str++;
228 if (str < endp)
229 c1 = *str++ & 0x7F;
230 else
231 c1 = charset, charset = c;
232 }
233 else
234 c1 = *str++ & 0x7f;
235 if (CHARSET_DEFINED_P (charset)
236 && CHARSET_DIMENSION (charset) == 2
237 && str < endp)
238 c2 = *str++ & 0x7F;
239 c = MAKE_NON_ASCII_CHAR (charset, c1, c2);
240 }
241 } while (0);
242
243 if (actual_len)
244 *actual_len = exclude_tail_garbage ? str - begp : bytes;
245 return c;
246 }
247
248 /* Return the length of the multi-byte form at string STR of length LEN. */
249 int
250 multibyte_form_length (str, len)
251 const unsigned char *str;
252 int len;
253 {
254 int bytes = 1;
255
256 if (BASE_LEADING_CODE_P (*str))
257 while (bytes < len && ! CHAR_HEAD_P (str[bytes])) bytes++;
258
259 return bytes;
260 }
261
262 /* Check if string STR of length LEN contains valid multi-byte form of
263 a character. If valid, charset and position codes of the character
264 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
265 return -1. This should be used only in the macro SPLIT_STRING
266 which checks range of STR in advance. */
267
268 int
269 split_non_ascii_string (str, len, charset, c1, c2)
270 register const unsigned char *str;
271 register unsigned char *c1, *c2;
272 register int len, *charset;
273 {
274 register unsigned int cs = *str++;
275
276 if (cs == LEADING_CODE_COMPOSITION)
277 {
278 int cmpchar_id = str_cmpchar_id (str - 1, len);
279
280 if (cmpchar_id < 0)
281 return -1;
282 *charset = cs, *c1 = cmpchar_id >> 7, *c2 = cmpchar_id & 0x7F;
283 }
284 else if ((cs < LEADING_CODE_PRIVATE_11 || (cs = *str++) >= 0xA0)
285 && CHARSET_DEFINED_P (cs))
286 {
287 *charset = cs;
288 if (*str < 0xA0)
289 return -1;
290 *c1 = (*str++) & 0x7F;
291 if (CHARSET_DIMENSION (cs) == 2)
292 {
293 if (*str < 0xA0)
294 return -1;
295 *c2 = (*str++) & 0x7F;
296 }
297 }
298 else
299 return -1;
300 return 0;
301 }
302
303 /* Translate character C by translation table TABLE. If C
304 is negative, translate a character specified by CHARSET, C1, and C2
305 (C1 and C2 are code points of the character). If no translation is
306 found in TABLE, return C. */
307 int
308 translate_char (table, c, charset, c1, c2)
309 Lisp_Object table;
310 int c, charset, c1, c2;
311 {
312 Lisp_Object ch;
313 int alt_charset, alt_c1, alt_c2, dimension;
314
315 if (c < 0) c = MAKE_CHAR (charset, c1, c2);
316 if (!CHAR_TABLE_P (table)
317 || (ch = Faref (table, make_number (c)), !INTEGERP (ch))
318 || XINT (ch) < 0)
319 return c;
320
321 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
322 dimension = CHARSET_DIMENSION (alt_charset);
323 if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
324 /* CH is not a generic character, just return it. */
325 return XFASTINT (ch);
326
327 /* Since CH is a generic character, we must return a specific
328 charater which has the same position codes as C from CH. */
329 if (charset < 0)
330 SPLIT_CHAR (c, charset, c1, c2);
331 if (dimension != CHARSET_DIMENSION (charset))
332 /* We can't make such a character because of dimension mismatch. */
333 return c;
334 return MAKE_CHAR (alt_charset, c1, c2);
335 }
336
337 /* Convert the unibyte character C to multibyte based on
338 Vnonascii_translation_table or nonascii_insert_offset. If they can't
339 convert C to a valid multibyte character, convert it based on
340 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
341
342 int
343 unibyte_char_to_multibyte (c)
344 int c;
345 {
346 if (c < 0400)
347 {
348 int c_save = c;
349
350 if (! NILP (Vnonascii_translation_table))
351 {
352 c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
353 if (c >= 0400 && ! VALID_MULTIBYTE_CHAR_P (c))
354 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
355 }
356 else if (c >= 0240 && nonascii_insert_offset > 0)
357 {
358 c += nonascii_insert_offset;
359 if (c < 0400 || ! VALID_MULTIBYTE_CHAR_P (c))
360 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
361 }
362 else if (c >= 0240)
363 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
364 }
365 return c;
366 }
367
368
369 /* Convert the multibyte character C to unibyte 8-bit character based
370 on Vnonascii_translation_table or nonascii_insert_offset. If
371 REV_TBL is non-nil, it should be a reverse table of
372 Vnonascii_translation_table, i.e. what given by:
373 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
374
375 int
376 multibyte_char_to_unibyte (c, rev_tbl)
377 int c;
378 Lisp_Object rev_tbl;
379 {
380 if (!SINGLE_BYTE_CHAR_P (c))
381 {
382 int c_save = c;
383
384 if (! CHAR_TABLE_P (rev_tbl)
385 && CHAR_TABLE_P (Vnonascii_translation_table))
386 rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
387 make_number (0));
388 if (CHAR_TABLE_P (rev_tbl))
389 {
390 Lisp_Object temp;
391 temp = Faref (rev_tbl, make_number (c));
392 if (INTEGERP (temp))
393 c = XINT (temp);
394 if (c >= 256)
395 c = (c_save & 0177) + 0200;
396 }
397 else
398 {
399 if (nonascii_insert_offset > 0)
400 c -= nonascii_insert_offset;
401 if (c < 128 || c >= 256)
402 c = (c_save & 0177) + 0200;
403 }
404 }
405
406 return c;
407 }
408
409 \f
410 /* Update the table Vcharset_table with the given arguments (see the
411 document of `define-charset' for the meaning of each argument).
412 Several other table contents are also updated. The caller should
413 check the validity of CHARSET-ID and the remaining arguments in
414 advance. */
415
416 void
417 update_charset_table (charset_id, dimension, chars, width, direction,
418 iso_final_char, iso_graphic_plane,
419 short_name, long_name, description)
420 Lisp_Object charset_id, dimension, chars, width, direction;
421 Lisp_Object iso_final_char, iso_graphic_plane;
422 Lisp_Object short_name, long_name, description;
423 {
424 int charset = XINT (charset_id);
425 int bytes;
426 unsigned char leading_code_base, leading_code_ext;
427
428 if (NILP (CHARSET_TABLE_ENTRY (charset)))
429 CHARSET_TABLE_ENTRY (charset)
430 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
431
432 /* Get byte length of multibyte form, base leading-code, and
433 extended leading-code of the charset. See the comment under the
434 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
435 bytes = XINT (dimension);
436 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
437 {
438 /* Official charset, it doesn't have an extended leading-code. */
439 if (charset != CHARSET_ASCII)
440 bytes += 1; /* For a base leading-code. */
441 leading_code_base = charset;
442 leading_code_ext = 0;
443 }
444 else
445 {
446 /* Private charset. */
447 bytes += 2; /* For base and extended leading-codes. */
448 leading_code_base
449 = (charset < LEADING_CODE_EXT_12
450 ? LEADING_CODE_PRIVATE_11
451 : (charset < LEADING_CODE_EXT_21
452 ? LEADING_CODE_PRIVATE_12
453 : (charset < LEADING_CODE_EXT_22
454 ? LEADING_CODE_PRIVATE_21
455 : LEADING_CODE_PRIVATE_22)));
456 leading_code_ext = charset;
457 }
458
459 if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
460 error ("Invalid dimension for the charset-ID %d", charset);
461
462 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
463 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
464 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
465 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
466 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
467 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
468 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
469 = make_number (leading_code_base);
470 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
471 = make_number (leading_code_ext);
472 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
473 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
474 = iso_graphic_plane;
475 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
476 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
477 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
478 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
479
480 {
481 /* If we have already defined a charset which has the same
482 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
483 DIRECTION, we must update the entry REVERSE-CHARSET of both
484 charsets. If there's no such charset, the value of the entry
485 is set to nil. */
486 int i;
487
488 for (i = 0; i <= MAX_CHARSET; i++)
489 if (!NILP (CHARSET_TABLE_ENTRY (i)))
490 {
491 if (CHARSET_DIMENSION (i) == XINT (dimension)
492 && CHARSET_CHARS (i) == XINT (chars)
493 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
494 && CHARSET_DIRECTION (i) != XINT (direction))
495 {
496 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
497 = make_number (i);
498 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
499 break;
500 }
501 }
502 if (i > MAX_CHARSET)
503 /* No such a charset. */
504 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
505 = make_number (-1);
506 }
507
508 if (charset != CHARSET_ASCII
509 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
510 {
511 width_by_char_head[leading_code_base] = XINT (width);
512
513 /* Update table emacs_code_class. */
514 emacs_code_class[charset] = (bytes == 2
515 ? EMACS_leading_code_2
516 : (bytes == 3
517 ? EMACS_leading_code_3
518 : EMACS_leading_code_4));
519 }
520
521 /* Update table iso_charset_table. */
522 if (ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
523 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
524 }
525
526 #ifdef emacs
527
528 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
529 is invalid. */
530 int
531 get_charset_id (charset_symbol)
532 Lisp_Object charset_symbol;
533 {
534 Lisp_Object val;
535 int charset;
536
537 return ((SYMBOLP (charset_symbol)
538 && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
539 && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
540 CHARSET_VALID_P (charset)))
541 ? charset : -1);
542 }
543
544 /* Return an identification number for a new private charset of
545 DIMENSION and WIDTH. If there's no more room for the new charset,
546 return 0. */
547 Lisp_Object
548 get_new_private_charset_id (dimension, width)
549 int dimension, width;
550 {
551 int charset, from, to;
552
553 if (dimension == 1)
554 {
555 if (width == 1)
556 from = LEADING_CODE_EXT_11, to = LEADING_CODE_EXT_12;
557 else
558 from = LEADING_CODE_EXT_12, to = LEADING_CODE_EXT_21;
559 }
560 else
561 {
562 if (width == 1)
563 from = LEADING_CODE_EXT_21, to = LEADING_CODE_EXT_22;
564 else
565 from = LEADING_CODE_EXT_22, to = LEADING_CODE_EXT_MAX + 1;
566 }
567
568 for (charset = from; charset < to; charset++)
569 if (!CHARSET_DEFINED_P (charset)) break;
570
571 return make_number (charset < to ? charset : 0);
572 }
573
574 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
575 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
576 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
577 treated as a private charset.\n\
578 INFO-VECTOR is a vector of the format:\n\
579 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
580 SHORT-NAME LONG-NAME DESCRIPTION]\n\
581 The meanings of each elements is as follows:\n\
582 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
583 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
584 WIDTH (integer) is the number of columns a character in the charset\n\
585 occupies on the screen: one of 0, 1, and 2.\n\
586 \n\
587 DIRECTION (integer) is the rendering direction of characters in the\n\
588 charset when rendering. If 0, render from left to right, else\n\
589 render from right to left.\n\
590 \n\
591 ISO-FINAL-CHAR (character) is the final character of the\n\
592 corresponding ISO 2022 charset.\n\
593 \n\
594 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
595 while encoding to variants of ISO 2022 coding system, one of the\n\
596 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
597 \n\
598 SHORT-NAME (string) is the short name to refer to the charset.\n\
599 \n\
600 LONG-NAME (string) is the long name to refer to the charset.\n\
601 \n\
602 DESCRIPTION (string) is the description string of the charset.")
603 (charset_id, charset_symbol, info_vector)
604 Lisp_Object charset_id, charset_symbol, info_vector;
605 {
606 Lisp_Object *vec;
607
608 if (!NILP (charset_id))
609 CHECK_NUMBER (charset_id, 0);
610 CHECK_SYMBOL (charset_symbol, 1);
611 CHECK_VECTOR (info_vector, 2);
612
613 if (! NILP (charset_id))
614 {
615 if (! CHARSET_VALID_P (XINT (charset_id)))
616 error ("Invalid CHARSET: %d", XINT (charset_id));
617 else if (CHARSET_DEFINED_P (XINT (charset_id)))
618 error ("Already defined charset: %d", XINT (charset_id));
619 }
620
621 vec = XVECTOR (info_vector)->contents;
622 if (XVECTOR (info_vector)->size != 9
623 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
624 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
625 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
626 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
627 || !INTEGERP (vec[4]) || !(XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
628 || !INTEGERP (vec[5]) || !(XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
629 || !STRINGP (vec[6])
630 || !STRINGP (vec[7])
631 || !STRINGP (vec[8]))
632 error ("Invalid info-vector argument for defining charset %s",
633 XSYMBOL (charset_symbol)->name->data);
634
635 if (NILP (charset_id))
636 {
637 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
638 if (XINT (charset_id) == 0)
639 error ("There's no room for a new private charset %s",
640 XSYMBOL (charset_symbol)->name->data);
641 }
642
643 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
644 vec[4], vec[5], vec[6], vec[7], vec[8]);
645 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
646 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
647 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
648 return Qnil;
649 }
650
651 DEFUN ("generic-character-list", Fgeneric_character_list,
652 Sgeneric_character_list, 0, 0, 0,
653 "Return a list of all possible generic characters.\n\
654 It includes a generic character for a charset not yet defined.")
655 ()
656 {
657 return Vgeneric_character_list;
658 }
659
660 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
661 Sget_unused_iso_final_char, 2, 2, 0,
662 "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
663 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
664 CHARS is the number of characters in a dimension: 94 or 96.\n\
665 \n\
666 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
667 If there's no unused final char for the specified kind of charset,\n\
668 return nil.")
669 (dimension, chars)
670 Lisp_Object dimension, chars;
671 {
672 int final_char;
673
674 CHECK_NUMBER (dimension, 0);
675 CHECK_NUMBER (chars, 1);
676 if (XINT (dimension) != 1 && XINT (dimension) != 2)
677 error ("Invalid charset dimension %d, it should be 1 or 2",
678 XINT (dimension));
679 if (XINT (chars) != 94 && XINT (chars) != 96)
680 error ("Invalid charset chars %d, it should be 94 or 96",
681 XINT (chars));
682 for (final_char = '0'; final_char <= '?'; final_char++)
683 {
684 if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
685 break;
686 }
687 return (final_char <= '?' ? make_number (final_char) : Qnil);
688 }
689
690 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
691 4, 4, 0,
692 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
693 CHARSET should be defined by `defined-charset' in advance.")
694 (dimension, chars, final_char, charset_symbol)
695 Lisp_Object dimension, chars, final_char, charset_symbol;
696 {
697 int charset;
698
699 CHECK_NUMBER (dimension, 0);
700 CHECK_NUMBER (chars, 1);
701 CHECK_NUMBER (final_char, 2);
702 CHECK_SYMBOL (charset_symbol, 3);
703
704 if (XINT (dimension) != 1 && XINT (dimension) != 2)
705 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
706 if (XINT (chars) != 94 && XINT (chars) != 96)
707 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
708 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
709 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
710 if ((charset = get_charset_id (charset_symbol)) < 0)
711 error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
712
713 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
714 return Qnil;
715 }
716
717 /* Return number of different charsets in STR of length LEN. In
718 addition, for each found charset N, CHARSETS[N] is set 1. The
719 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
720 It may lookup a translation table TABLE if supplied.
721
722 If CMPCHARP is nonzero and some composite character is found,
723 CHARSETS[128] is also set 1 and the returned number is incremented
724 by 1. */
725
726 int
727 find_charset_in_str (str, len, charsets, table, cmpcharp)
728 unsigned char *str;
729 int len, *charsets;
730 Lisp_Object table;
731 int cmpcharp;
732 {
733 register int num = 0, c;
734
735 if (! CHAR_TABLE_P (table))
736 table = Qnil;
737
738 while (len > 0)
739 {
740 int bytes, charset;
741 c = *str;
742
743 if (c == LEADING_CODE_COMPOSITION)
744 {
745 int cmpchar_id = str_cmpchar_id (str, len);
746 GLYPH *glyph;
747
748 if (cmpchar_id >= 0)
749 {
750 struct cmpchar_info *cmp_p = cmpchar_table[cmpchar_id];
751 int i;
752
753 for (i = 0; i < cmp_p->glyph_len; i++)
754 {
755 c = cmp_p->glyph[i];
756 if (!NILP (table))
757 {
758 if ((c = translate_char (table, c, 0, 0, 0)) < 0)
759 c = cmp_p->glyph[i];
760 }
761 if ((charset = CHAR_CHARSET (c)) < 0)
762 charset = CHARSET_ASCII;
763 if (!charsets[charset])
764 {
765 charsets[charset] = 1;
766 num += 1;
767 }
768 }
769 str += cmp_p->len;
770 len -= cmp_p->len;
771 if (cmpcharp && !charsets[CHARSET_COMPOSITION])
772 {
773 charsets[CHARSET_COMPOSITION] = 1;
774 num += 1;
775 }
776 continue;
777 }
778
779 charset = CHARSET_ASCII;
780 bytes = 1;
781 }
782 else
783 {
784 c = STRING_CHAR_AND_LENGTH (str, len, bytes);
785 if (! NILP (table))
786 {
787 int c1 = translate_char (table, c, 0, 0, 0);
788 if (c1 >= 0)
789 c = c1;
790 }
791 charset = CHAR_CHARSET (c);
792 }
793
794 if (!charsets[charset])
795 {
796 charsets[charset] = 1;
797 num += 1;
798 }
799 str += bytes;
800 len -= bytes;
801 }
802 return num;
803 }
804
805 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
806 2, 3, 0,
807 "Return a list of charsets in the region between BEG and END.\n\
808 BEG and END are buffer positions.\n\
809 If the region contains any composite character,\n\
810 `composition' is included in the returned list.\n\
811 Optional arg TABLE if non-nil is a translation table to look up.")
812 (beg, end, table)
813 Lisp_Object beg, end, table;
814 {
815 int charsets[MAX_CHARSET + 1];
816 int from, from_byte, to, stop, stop_byte, i;
817 Lisp_Object val;
818
819 validate_region (&beg, &end);
820 from = XFASTINT (beg);
821 stop = to = XFASTINT (end);
822
823 if (NILP (current_buffer->enable_multibyte_characters))
824 return (from == to
825 ? Qnil
826 : Fcons (Qascii, Qnil));
827
828 if (from < GPT && GPT < to)
829 {
830 stop = GPT;
831 stop_byte = GPT_BYTE;
832 }
833 else
834 stop_byte = CHAR_TO_BYTE (stop);
835
836 from_byte = CHAR_TO_BYTE (from);
837
838 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
839 while (1)
840 {
841 find_charset_in_str (BYTE_POS_ADDR (from_byte), stop_byte - from_byte,
842 charsets, table, 1);
843 if (stop < to)
844 {
845 from = stop, from_byte = stop_byte;
846 stop = to, stop_byte = CHAR_TO_BYTE (stop);
847 }
848 else
849 break;
850 }
851
852 val = Qnil;
853 for (i = MAX_CHARSET; i >= 0; i--)
854 if (charsets[i])
855 val = Fcons (CHARSET_SYMBOL (i), val);
856 return val;
857 }
858
859 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
860 1, 2, 0,
861 "Return a list of charsets in STR.\n\
862 If the string contains any composite characters,\n\
863 `composition' is included in the returned list.\n\
864 Optional arg TABLE if non-nil is a translation table to look up.")
865 (str, table)
866 Lisp_Object str, table;
867 {
868 int charsets[MAX_CHARSET + 1];
869 int i;
870 Lisp_Object val;
871
872 CHECK_STRING (str, 0);
873
874 if (! STRING_MULTIBYTE (str))
875 return (XSTRING (str)->size == 0
876 ? Qnil
877 : Fcons (Qascii, Qnil));
878
879 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
880 find_charset_in_str (XSTRING (str)->data, STRING_BYTES (XSTRING (str)),
881 charsets, table, 1);
882 val = Qnil;
883 for (i = MAX_CHARSET; i >= 0; i--)
884 if (charsets[i])
885 val = Fcons (CHARSET_SYMBOL (i), val);
886 return val;
887 }
888 \f
889 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
890 "")
891 (charset, code1, code2)
892 Lisp_Object charset, code1, code2;
893 {
894 CHECK_NUMBER (charset, 0);
895
896 if (NILP (code1))
897 XSETFASTINT (code1, 0);
898 else
899 CHECK_NUMBER (code1, 1);
900 if (NILP (code2))
901 XSETFASTINT (code2, 0);
902 else
903 CHECK_NUMBER (code2, 2);
904
905 if (!CHARSET_DEFINED_P (XINT (charset)))
906 error ("Invalid charset: %d", XINT (charset));
907
908 return make_number (MAKE_CHAR (XINT (charset), XINT (code1), XINT (code2)));
909 }
910
911 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
912 "Return list of charset and one or two position-codes of CHAR.")
913 (ch)
914 Lisp_Object ch;
915 {
916 Lisp_Object val;
917 int charset, c1, c2;
918
919 CHECK_NUMBER (ch, 0);
920 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
921 return (c2 >= 0
922 ? Fcons (CHARSET_SYMBOL (charset),
923 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
924 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
925 }
926
927 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
928 "Return charset of CHAR.")
929 (ch)
930 Lisp_Object ch;
931 {
932 CHECK_NUMBER (ch, 0);
933
934 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
935 }
936
937 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
938 "Return charset of a character in current buffer at position POS.\n\
939 If POS is nil, it defauls to the current point.")
940 (pos)
941 Lisp_Object pos;
942 {
943 register int pos_byte, c, charset;
944 register unsigned char *p;
945
946 if (NILP (pos))
947 pos_byte = PT_BYTE;
948 else if (MARKERP (pos))
949 pos_byte = marker_byte_position (pos);
950 else
951 {
952 CHECK_NUMBER (pos, 0);
953 pos_byte = CHAR_TO_BYTE (XINT (pos));
954 }
955 p = BYTE_POS_ADDR (pos_byte);
956 c = STRING_CHAR (p, Z_BYTE - pos_byte);
957 charset = CHAR_CHARSET (c);
958 return CHARSET_SYMBOL (charset);
959 }
960
961 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
962 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
963 \n\
964 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
965 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
966 where as Emacs distinguishes them by charset symbol.\n\
967 See the documentation of the function `charset-info' for the meanings of\n\
968 DIMENSION, CHARS, and FINAL-CHAR.")
969 (dimension, chars, final_char)
970 Lisp_Object dimension, chars, final_char;
971 {
972 int charset;
973
974 CHECK_NUMBER (dimension, 0);
975 CHECK_NUMBER (chars, 1);
976 CHECK_NUMBER (final_char, 2);
977
978 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
979 return Qnil;
980 return CHARSET_SYMBOL (charset);
981 }
982
983 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
984 generic character. If GENERICP is zero, return nonzero iff C is a
985 valid normal character. Do not call this function directly,
986 instead use macro CHAR_VALID_P. */
987 int
988 char_valid_p (c, genericp)
989 int c, genericp;
990 {
991 int charset, c1, c2;
992
993 if (c < 0)
994 return 0;
995 if (SINGLE_BYTE_CHAR_P (c))
996 return 1;
997 SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
998 if (charset != CHARSET_COMPOSITION && !CHARSET_DEFINED_P (charset))
999 return 0;
1000 return (c < MIN_CHAR_COMPOSITION
1001 ? ((c & CHAR_FIELD1_MASK) /* i.e. dimension of C is two. */
1002 ? (genericp && c1 == 0 && c2 == 0
1003 || c1 >= 32 && c2 >= 32)
1004 : (genericp && c1 == 0
1005 || c1 >= 32))
1006 : c < MIN_CHAR_COMPOSITION + n_cmpchars);
1007 }
1008
1009 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1010 "Return t if OBJECT is a valid normal character.\n\
1011 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
1012 a valid generic character.")
1013 (object, genericp)
1014 Lisp_Object object, genericp;
1015 {
1016 if (! NATNUMP (object))
1017 return Qnil;
1018 return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1019 }
1020
1021 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1022 Sunibyte_char_to_multibyte, 1, 1, 0,
1023 "Convert the unibyte character CH to multibyte character.\n\
1024 The conversion is done based on `nonascii-translation-table' (which see)\n\
1025 or `nonascii-insert-offset' (which see).")
1026 (ch)
1027 Lisp_Object ch;
1028 {
1029 int c;
1030
1031 CHECK_NUMBER (ch, 0);
1032 c = XINT (ch);
1033 if (c < 0 || c >= 0400)
1034 error ("Invalid unibyte character: %d", c);
1035 c = unibyte_char_to_multibyte (c);
1036 if (c < 0)
1037 error ("Can't convert to multibyte character: %d", XINT (ch));
1038 return make_number (c);
1039 }
1040
1041 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1042 Smultibyte_char_to_unibyte, 1, 1, 0,
1043 "Convert the multibyte character CH to unibyte character.\n\
1044 The conversion is done based on `nonascii-translation-table' (which see)\n\
1045 or `nonascii-insert-offset' (which see).")
1046 (ch)
1047 Lisp_Object ch;
1048 {
1049 int c;
1050
1051 CHECK_NUMBER (ch, 0);
1052 c = XINT (ch);
1053 if (c < 0)
1054 error ("Invalid multibyte character: %d", c);
1055 c = multibyte_char_to_unibyte (c, Qnil);
1056 if (c < 0)
1057 error ("Can't convert to unibyte character: %d", XINT (ch));
1058 return make_number (c);
1059 }
1060
1061 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1062 "Return 1 regardless of the argument CHAR.\n\
1063 This is now an obsolete function. We keep it just for backward compatibility.")
1064 (ch)
1065 Lisp_Object ch;
1066 {
1067 Lisp_Object val;
1068
1069 CHECK_NUMBER (ch, 0);
1070 return make_number (1);
1071 }
1072
1073 /* Return how many bytes C will occupy in a multibyte buffer.
1074 Don't call this function directly, instead use macro CHAR_BYTES. */
1075 int
1076 char_bytes (c)
1077 int c;
1078 {
1079 int bytes;
1080
1081 if (COMPOSITE_CHAR_P (c))
1082 {
1083 unsigned int id = COMPOSITE_CHAR_ID (c);
1084
1085 bytes = (id < n_cmpchars ? cmpchar_table[id]->len : 1);
1086 }
1087 else
1088 {
1089 int charset = CHAR_CHARSET (c);
1090
1091 bytes = CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1;
1092 }
1093
1094 return bytes;
1095 }
1096
1097 /* Return the width of character of which multi-byte form starts with
1098 C. The width is measured by how many columns occupied on the
1099 screen when displayed in the current buffer. */
1100
1101 #define ONE_BYTE_CHAR_WIDTH(c) \
1102 (c < 0x20 \
1103 ? (c == '\t' \
1104 ? XFASTINT (current_buffer->tab_width) \
1105 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1106 : (c < 0x7f \
1107 ? 1 \
1108 : (c == 0x7F \
1109 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1110 : ((! NILP (current_buffer->enable_multibyte_characters) \
1111 && BASE_LEADING_CODE_P (c)) \
1112 ? WIDTH_BY_CHAR_HEAD (c) \
1113 : 4))))
1114
1115 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1116 "Return width of CHAR when displayed in the current buffer.\n\
1117 The width is measured by how many columns it occupies on the screen.")
1118 (ch)
1119 Lisp_Object ch;
1120 {
1121 Lisp_Object val, disp;
1122 int c;
1123 struct Lisp_Char_Table *dp = buffer_display_table ();
1124
1125 CHECK_NUMBER (ch, 0);
1126
1127 c = XINT (ch);
1128
1129 /* Get the way the display table would display it. */
1130 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1131
1132 if (VECTORP (disp))
1133 XSETINT (val, XVECTOR (disp)->size);
1134 else if (SINGLE_BYTE_CHAR_P (c))
1135 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1136 else if (COMPOSITE_CHAR_P (c))
1137 {
1138 int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
1139 XSETFASTINT (val, (id < n_cmpchars ? cmpchar_table[id]->width : 0));
1140 }
1141 else
1142 {
1143 int charset = CHAR_CHARSET (c);
1144
1145 XSETFASTINT (val, CHARSET_WIDTH (charset));
1146 }
1147 return val;
1148 }
1149
1150 /* Return width of string STR of length LEN when displayed in the
1151 current buffer. The width is measured by how many columns it
1152 occupies on the screen. */
1153
1154 int
1155 strwidth (str, len)
1156 unsigned char *str;
1157 int len;
1158 {
1159 unsigned char *endp = str + len;
1160 int width = 0;
1161 struct Lisp_Char_Table *dp = buffer_display_table ();
1162
1163 while (str < endp)
1164 {
1165 if (*str == LEADING_CODE_COMPOSITION)
1166 {
1167 int id = str_cmpchar_id (str, endp - str);
1168
1169 if (id < 0)
1170 {
1171 width += 4;
1172 str++;
1173 }
1174 else
1175 {
1176 width += cmpchar_table[id]->width;
1177 str += cmpchar_table[id]->len;
1178 }
1179 }
1180 else
1181 {
1182 Lisp_Object disp;
1183 int thislen;
1184 int c = STRING_CHAR_AND_LENGTH (str, endp - str, thislen);
1185
1186 /* Get the way the display table would display it. */
1187 if (dp)
1188 disp = DISP_CHAR_VECTOR (dp, c);
1189 else
1190 disp = Qnil;
1191
1192 if (VECTORP (disp))
1193 width += XVECTOR (disp)->size;
1194 else
1195 width += ONE_BYTE_CHAR_WIDTH (*str);
1196
1197 str += thislen;
1198 }
1199 }
1200 return width;
1201 }
1202
1203 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1204 "Return width of STRING when displayed in the current buffer.\n\
1205 Width is measured by how many columns it occupies on the screen.\n\
1206 When calculating width of a multibyte character in STRING,\n\
1207 only the base leading-code is considered; the validity of\n\
1208 the following bytes is not checked.")
1209 (str)
1210 Lisp_Object str;
1211 {
1212 Lisp_Object val;
1213
1214 CHECK_STRING (str, 0);
1215 XSETFASTINT (val, strwidth (XSTRING (str)->data,
1216 STRING_BYTES (XSTRING (str))));
1217 return val;
1218 }
1219
1220 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1221 "Return the direction of CHAR.\n\
1222 The returned value is 0 for left-to-right and 1 for right-to-left.")
1223 (ch)
1224 Lisp_Object ch;
1225 {
1226 int charset;
1227
1228 CHECK_NUMBER (ch, 0);
1229 charset = CHAR_CHARSET (XFASTINT (ch));
1230 if (!CHARSET_DEFINED_P (charset))
1231 invalid_character (XINT (ch));
1232 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1233 }
1234
1235 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1236 "Return number of characters between BEG and END.")
1237 (beg, end)
1238 Lisp_Object beg, end;
1239 {
1240 int from, to;
1241
1242 CHECK_NUMBER_COERCE_MARKER (beg, 0);
1243 CHECK_NUMBER_COERCE_MARKER (end, 1);
1244
1245 from = min (XFASTINT (beg), XFASTINT (end));
1246 to = max (XFASTINT (beg), XFASTINT (end));
1247
1248 return make_number (to - from);
1249 }
1250
1251 /* Return the number of characters in the NBYTES bytes at PTR.
1252 This works by looking at the contents and checking for multibyte sequences.
1253 However, if the current buffer has enable-multibyte-characters = nil,
1254 we treat each byte as a character. */
1255
1256 int
1257 chars_in_text (ptr, nbytes)
1258 unsigned char *ptr;
1259 int nbytes;
1260 {
1261 unsigned char *endp, c;
1262 int chars;
1263
1264 /* current_buffer is null at early stages of Emacs initialization. */
1265 if (current_buffer == 0
1266 || NILP (current_buffer->enable_multibyte_characters))
1267 return nbytes;
1268
1269 endp = ptr + nbytes;
1270 chars = 0;
1271
1272 while (ptr < endp)
1273 {
1274 c = *ptr++;
1275
1276 if (BASE_LEADING_CODE_P (c))
1277 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
1278 chars++;
1279 }
1280
1281 return chars;
1282 }
1283
1284 /* Return the number of characters in the NBYTES bytes at PTR.
1285 This works by looking at the contents and checking for multibyte sequences.
1286 It ignores enable-multibyte-characters. */
1287
1288 int
1289 multibyte_chars_in_text (ptr, nbytes)
1290 unsigned char *ptr;
1291 int nbytes;
1292 {
1293 unsigned char *endp, c;
1294 int chars;
1295
1296 endp = ptr + nbytes;
1297 chars = 0;
1298
1299 while (ptr < endp)
1300 {
1301 c = *ptr++;
1302
1303 if (BASE_LEADING_CODE_P (c))
1304 while (ptr < endp && ! CHAR_HEAD_P (*ptr)) ptr++;
1305 chars++;
1306 }
1307
1308 return chars;
1309 }
1310
1311 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
1312 "Concatenate all the argument characters and make the result a string.")
1313 (n, args)
1314 int n;
1315 Lisp_Object *args;
1316 {
1317 int i;
1318 unsigned char *buf
1319 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
1320 unsigned char *p = buf;
1321 Lisp_Object val;
1322
1323 for (i = 0; i < n; i++)
1324 {
1325 int c, len;
1326 unsigned char *str;
1327
1328 if (!INTEGERP (args[i]))
1329 CHECK_NUMBER (args[i], 0);
1330 c = XINT (args[i]);
1331 len = CHAR_STRING (c, p, str);
1332 if (p != str)
1333 /* C is a composite character. */
1334 bcopy (str, p, len);
1335 p += len;
1336 }
1337
1338 /* Here, we can't use make_string_from_bytes because of byte
1339 combining problem. */
1340 val = make_string (buf, p - buf);
1341 return val;
1342 }
1343
1344 #endif /* emacs */
1345 \f
1346 /*** Composite characters staffs ***/
1347
1348 /* Each composite character is identified by CMPCHAR-ID which is
1349 assigned when Emacs needs the character code of the composite
1350 character (e.g. when displaying it on the screen). See the
1351 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
1352 composite character is represented in Emacs. */
1353
1354 /* If `static' is defined, it means that it is defined to null string. */
1355 #ifndef static
1356 /* The following function is copied from lread.c. */
1357 static int
1358 hash_string (ptr, len)
1359 unsigned char *ptr;
1360 int len;
1361 {
1362 register unsigned char *p = ptr;
1363 register unsigned char *end = p + len;
1364 register unsigned char c;
1365 register int hash = 0;
1366
1367 while (p != end)
1368 {
1369 c = *p++;
1370 if (c >= 0140) c -= 40;
1371 hash = ((hash<<3) + (hash>>28) + c);
1372 }
1373 return hash & 07777777777;
1374 }
1375 #endif
1376
1377 #define CMPCHAR_HASH_TABLE_SIZE 0xFFF
1378
1379 static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
1380
1381 /* Each element of `cmpchar_hash_table' is a pointer to an array of
1382 integer, where the 1st element is the size of the array, the 2nd
1383 element is how many elements are actually used in the array, and
1384 the remaining elements are CMPCHAR-IDs of composite characters of
1385 the same hash value. */
1386 #define CMPCHAR_HASH_SIZE(table) table[0]
1387 #define CMPCHAR_HASH_USED(table) table[1]
1388 #define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1389
1390 /* Return CMPCHAR-ID of the composite character in STR of the length
1391 LEN. If the composite character has not yet been registered,
1392 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1393 is the sole function for assigning CMPCHAR-ID. */
1394 int
1395 str_cmpchar_id (str, len)
1396 const unsigned char *str;
1397 int len;
1398 {
1399 int hash_idx, *hashp;
1400 unsigned char *buf;
1401 int embedded_rule; /* 1 if composition rule is embedded. */
1402 int chars; /* number of components. */
1403 int i;
1404 struct cmpchar_info *cmpcharp;
1405
1406 /* The second byte 0xFF means compostion rule is embedded. */
1407 embedded_rule = (str[1] == 0xFF);
1408
1409 /* At first, get the actual length of the composite character. */
1410 {
1411 const unsigned char *p, *endp = str + 1, *lastp = str + len;
1412 int bytes;
1413
1414 while (endp < lastp && ! CHAR_HEAD_P (*endp)) endp++;
1415 if (endp - str < 5)
1416 /* Any composite char have at least 5-byte length. */
1417 return -1;
1418
1419 chars = 0;
1420 p = str + 1;
1421 while (p < endp)
1422 {
1423 if (embedded_rule)
1424 {
1425 p++;
1426 if (p >= endp)
1427 return -1;
1428 }
1429 /* No need of checking if *P is 0xA0 because
1430 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1431 p += BYTES_BY_CHAR_HEAD (*p - 0x20);
1432 chars++;
1433 }
1434 if (p > endp || chars < 2 || chars > MAX_COMPONENT_COUNT)
1435 /* Invalid components. */
1436 return -1;
1437 len = p - str;
1438 }
1439 hash_idx = hash_string (str, len) % CMPCHAR_HASH_TABLE_SIZE;
1440 hashp = cmpchar_hash_table[hash_idx];
1441
1442 /* Then, look into the hash table. */
1443 if (hashp != NULL)
1444 /* Find the correct one among composite characters of the same
1445 hash value. */
1446 for (i = 2; i < CMPCHAR_HASH_USED (hashp); i++)
1447 {
1448 cmpcharp = cmpchar_table[CMPCHAR_HASH_CMPCHAR_ID (hashp, i)];
1449 if (len == cmpcharp->len
1450 && ! bcmp (str, cmpcharp->data, len))
1451 return CMPCHAR_HASH_CMPCHAR_ID (hashp, i);
1452 }
1453
1454 /* We have to register the composite character in cmpchar_table. */
1455 if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
1456 /* No, we have no more room for a new composite character. */
1457 return -1;
1458
1459 /* Make the entry in hash table. */
1460 if (hashp == NULL)
1461 {
1462 /* Make a table for 8 composite characters initially. */
1463 hashp = (cmpchar_hash_table[hash_idx]
1464 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1465 CMPCHAR_HASH_SIZE (hashp) = 10;
1466 CMPCHAR_HASH_USED (hashp) = 2;
1467 }
1468 else if (CMPCHAR_HASH_USED (hashp) >= CMPCHAR_HASH_SIZE (hashp))
1469 {
1470 CMPCHAR_HASH_SIZE (hashp) += 8;
1471 hashp = (cmpchar_hash_table[hash_idx]
1472 = (int *) xrealloc (hashp,
1473 sizeof (int) * CMPCHAR_HASH_SIZE (hashp)));
1474 }
1475 CMPCHAR_HASH_CMPCHAR_ID (hashp, CMPCHAR_HASH_USED (hashp)) = n_cmpchars;
1476 CMPCHAR_HASH_USED (hashp)++;
1477
1478 /* Set information of the composite character in cmpchar_table. */
1479 if (cmpchar_table_size == 0)
1480 {
1481 /* This is the first composite character to be registered. */
1482 cmpchar_table_size = 256;
1483 cmpchar_table
1484 = (struct cmpchar_info **) xmalloc (sizeof (cmpchar_table[0])
1485 * cmpchar_table_size);
1486 }
1487 else if (cmpchar_table_size <= n_cmpchars)
1488 {
1489 cmpchar_table_size += 256;
1490 cmpchar_table
1491 = (struct cmpchar_info **) xrealloc (cmpchar_table,
1492 sizeof (cmpchar_table[0])
1493 * cmpchar_table_size);
1494 }
1495
1496 cmpcharp = (struct cmpchar_info *) xmalloc (sizeof (struct cmpchar_info));
1497
1498 cmpcharp->len = len;
1499 cmpcharp->data = (unsigned char *) xmalloc (len + 1);
1500 bcopy (str, cmpcharp->data, len);
1501 cmpcharp->data[len] = 0;
1502 cmpcharp->glyph_len = chars;
1503 cmpcharp->glyph = (GLYPH *) xmalloc (sizeof (GLYPH) * chars);
1504 if (embedded_rule)
1505 {
1506 cmpcharp->cmp_rule = (unsigned char *) xmalloc (chars);
1507 cmpcharp->col_offset = (float *) xmalloc (sizeof (float) * chars);
1508 }
1509 else
1510 {
1511 cmpcharp->cmp_rule = NULL;
1512 cmpcharp->col_offset = NULL;
1513 }
1514
1515 /* Setup GLYPH data and composition rules (if any) so as not to make
1516 them every time on displaying. */
1517 {
1518 unsigned char *bufp;
1519 int width;
1520 float leftmost = 0.0, rightmost = 1.0;
1521
1522 if (embedded_rule)
1523 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1524 cmpcharp->col_offset[0] = 0;
1525
1526 for (i = 0, bufp = cmpcharp->data + 1; i < chars; i++)
1527 {
1528 if (embedded_rule)
1529 cmpcharp->cmp_rule[i] = *bufp++;
1530
1531 if (*bufp == 0xA0) /* This is an ASCII character. */
1532 {
1533 cmpcharp->glyph[i] = FAST_MAKE_GLYPH ((*++bufp & 0x7F), 0);
1534 width = 1;
1535 bufp++;
1536 }
1537 else /* Multibyte character. */
1538 {
1539 /* Make `bufp' point normal multi-byte form temporally. */
1540 *bufp -= 0x20;
1541 cmpcharp->glyph[i]
1542 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp, 4, 0, 0), 0);
1543 width = WIDTH_BY_CHAR_HEAD (*bufp);
1544 *bufp += 0x20;
1545 bufp += BYTES_BY_CHAR_HEAD (*bufp - 0x20);
1546 }
1547
1548 if (embedded_rule && i > 0)
1549 {
1550 /* Reference points (global_ref and new_ref) are
1551 encoded as below:
1552
1553 0--1--2 -- ascent
1554 | |
1555 | |
1556 | 4 -+--- center
1557 -- 3 5 -- baseline
1558 | |
1559 6--7--8 -- descent
1560
1561 Now, we calculate the column offset of the new glyph
1562 from the left edge of the first glyph. This can avoid
1563 the same calculation everytime displaying this
1564 composite character. */
1565
1566 /* Reference points of global glyph and new glyph. */
1567 int global_ref = (cmpcharp->cmp_rule[i] - 0xA0) / 9;
1568 int new_ref = (cmpcharp->cmp_rule[i] - 0xA0) % 9;
1569 /* Column offset relative to the first glyph. */
1570 float left = (leftmost
1571 + (global_ref % 3) * (rightmost - leftmost) / 2.0
1572 - (new_ref % 3) * width / 2.0);
1573
1574 cmpcharp->col_offset[i] = left;
1575 if (left < leftmost)
1576 leftmost = left;
1577 if (left + width > rightmost)
1578 rightmost = left + width;
1579 }
1580 else
1581 {
1582 if (width > rightmost)
1583 rightmost = width;
1584 }
1585 }
1586 if (embedded_rule)
1587 {
1588 /* Now col_offset[N] are relative to the left edge of the
1589 first component. Make them relative to the left edge of
1590 overall glyph. */
1591 for (i = 0; i < chars; i++)
1592 cmpcharp->col_offset[i] -= leftmost;
1593 /* Make rightmost holds width of overall glyph. */
1594 rightmost -= leftmost;
1595 }
1596
1597 cmpcharp->width = rightmost;
1598 if (cmpcharp->width < rightmost)
1599 /* To get a ceiling integer value. */
1600 cmpcharp->width++;
1601 }
1602
1603 cmpchar_table[n_cmpchars] = cmpcharp;
1604
1605 return n_cmpchars++;
1606 }
1607
1608 /* Return the Nth element of the composite character C. If NOERROR is
1609 nonzero, return 0 on error condition (C is an invalid composite
1610 charcter, or N is out of range). */
1611 int
1612 cmpchar_component (c, n, noerror)
1613 int c, n, noerror;
1614 {
1615 int id = COMPOSITE_CHAR_ID (c);
1616
1617 if (id < 0 || id >= n_cmpchars)
1618 {
1619 /* C is not a valid composite character. */
1620 if (noerror) return 0;
1621 error ("Invalid composite character: %d", c) ;
1622 }
1623 if (n >= cmpchar_table[id]->glyph_len)
1624 {
1625 /* No such component. */
1626 if (noerror) return 0;
1627 args_out_of_range (make_number (c), make_number (n));
1628 }
1629 /* No face data is stored in glyph code. */
1630 return ((int) (cmpchar_table[id]->glyph[n]));
1631 }
1632
1633 DEFUN ("cmpcharp", Fcmpcharp, Scmpcharp, 1, 1, 0,
1634 "T if CHAR is a composite character.")
1635 (ch)
1636 Lisp_Object ch;
1637 {
1638 CHECK_NUMBER (ch, 0);
1639 return (COMPOSITE_CHAR_P (XINT (ch)) ? Qt : Qnil);
1640 }
1641
1642 DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component,
1643 2, 2, 0,
1644 "Return the Nth component character of composite character CHARACTER.")
1645 (character, n)
1646 Lisp_Object character, n;
1647 {
1648 int id;
1649
1650 CHECK_NUMBER (character, 0);
1651 CHECK_NUMBER (n, 1);
1652
1653 return (make_number (cmpchar_component (XINT (character), XINT (n), 0)));
1654 }
1655
1656 DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
1657 2, 2, 0,
1658 "Return the Nth composition rule of composite character CHARACTER.\n\
1659 The returned rule is for composing the Nth component\n\
1660 on the (N-1)th component.\n\
1661 If CHARACTER should be composed relatively or N is 0, return 255.")
1662 (character, n)
1663 Lisp_Object character, n;
1664 {
1665 int id;
1666
1667 CHECK_NUMBER (character, 0);
1668 CHECK_NUMBER (n, 1);
1669
1670 id = COMPOSITE_CHAR_ID (XINT (character));
1671 if (id < 0 || id >= n_cmpchars)
1672 error ("Invalid composite character: %d", XINT (character));
1673 if (XINT (n) < 0 || XINT (n) >= cmpchar_table[id]->glyph_len)
1674 args_out_of_range (character, n);
1675
1676 return make_number (cmpchar_table[id]->cmp_rule
1677 ? cmpchar_table[id]->cmp_rule[XINT (n)]
1678 : 255);
1679 }
1680
1681 DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p,
1682 Scmpchar_cmp_rule_p, 1, 1, 0,
1683 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1684 (character)
1685 Lisp_Object character;
1686 {
1687 int id;
1688
1689 CHECK_NUMBER (character, 0);
1690 id = COMPOSITE_CHAR_ID (XINT (character));
1691 if (id < 0 || id >= n_cmpchars)
1692 error ("Invalid composite character: %d", XINT (character));
1693
1694 return (cmpchar_table[id]->cmp_rule ? Qt : Qnil);
1695 }
1696
1697 DEFUN ("composite-char-component-count", Fcmpchar_cmp_count,
1698 Scmpchar_cmp_count, 1, 1, 0,
1699 "Return number of compoents of composite character CHARACTER.")
1700 (character)
1701 Lisp_Object character;
1702 {
1703 int id;
1704
1705 CHECK_NUMBER (character, 0);
1706 id = COMPOSITE_CHAR_ID (XINT (character));
1707 if (id < 0 || id >= n_cmpchars)
1708 error ("Invalid composite character: %d", XINT (character));
1709
1710 return (make_number (cmpchar_table[id]->glyph_len));
1711 }
1712
1713 DEFUN ("compose-string", Fcompose_string, Scompose_string,
1714 1, 1, 0,
1715 "Return one char string composed from all characters in STRING.")
1716 (str)
1717 Lisp_Object str;
1718 {
1719 unsigned char buf[MAX_LENGTH_OF_MULTI_BYTE_FORM], *p, *pend, *ptemp;
1720 int len, i;
1721
1722 CHECK_STRING (str, 0);
1723
1724 buf[0] = LEADING_CODE_COMPOSITION;
1725 p = XSTRING (str)->data;
1726 pend = p + STRING_BYTES (XSTRING (str));
1727 i = 1;
1728 while (p < pend)
1729 {
1730 if (*p < 0x20) /* control code */
1731 error ("Invalid component character: %d", *p);
1732 else if (*p < 0x80) /* ASCII */
1733 {
1734 if (i + 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1735 error ("Too long string to be composed: %s", XSTRING (str)->data);
1736 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1737 code itself. */
1738 buf[i++] = 0xA0;
1739 buf[i++] = *p++ + 0x80;
1740 }
1741 else if (*p == LEADING_CODE_COMPOSITION) /* composite char */
1742 {
1743 /* Already composed. Eliminate the heading
1744 LEADING_CODE_COMPOSITION, keep the remaining bytes
1745 unchanged. */
1746 p++;
1747 if (*p == 255)
1748 error ("Can't compose a rule-based composition character");
1749 ptemp = p;
1750 while (! CHAR_HEAD_P (*p)) p++;
1751 if (str_cmpchar_id (ptemp - 1, p - ptemp + 1) < 0)
1752 error ("Can't compose an invalid composition character");
1753 if (i + (p - ptemp) >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1754 error ("Too long string to be composed: %s", XSTRING (str)->data);
1755 bcopy (ptemp, buf + i, p - ptemp);
1756 i += p - ptemp;
1757 }
1758 else /* multibyte char */
1759 {
1760 /* Add 0x20 to the base leading-code, keep the remaining
1761 bytes unchanged. */
1762 int c = STRING_CHAR_AND_CHAR_LENGTH (p, pend - p, len);
1763
1764 if (len <= 1 || ! CHAR_VALID_P (c, 0))
1765 error ("Can't compose an invalid character");
1766 if (i + len >= MAX_LENGTH_OF_MULTI_BYTE_FORM)
1767 error ("Too long string to be composed: %s", XSTRING (str)->data);
1768 bcopy (p, buf + i, len);
1769 buf[i] += 0x20;
1770 p += len, i += len;
1771 }
1772 }
1773
1774 if (i < 5)
1775 /* STR contains only one character, which can't be composed. */
1776 error ("Too short string to be composed: %s", XSTRING (str)->data);
1777
1778 return make_string_from_bytes (buf, 1, i);
1779 }
1780
1781 \f
1782 int
1783 charset_id_internal (charset_name)
1784 char *charset_name;
1785 {
1786 Lisp_Object val;
1787
1788 val= Fget (intern (charset_name), Qcharset);
1789 if (!VECTORP (val))
1790 error ("Charset %s is not defined", charset_name);
1791
1792 return (XINT (XVECTOR (val)->contents[0]));
1793 }
1794
1795 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1796 Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1797 ()
1798 {
1799 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1800 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1801 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1802 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1803 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1804 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1805 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1806 return Qnil;
1807 }
1808
1809 void
1810 init_charset_once ()
1811 {
1812 int i, j, k;
1813
1814 staticpro (&Vcharset_table);
1815 staticpro (&Vcharset_symbol_table);
1816 staticpro (&Vgeneric_character_list);
1817
1818 /* This has to be done here, before we call Fmake_char_table. */
1819 Qcharset_table = intern ("charset-table");
1820 staticpro (&Qcharset_table);
1821
1822 /* Intern this now in case it isn't already done.
1823 Setting this variable twice is harmless.
1824 But don't staticpro it here--that is done in alloc.c. */
1825 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1826
1827 /* Now we are ready to set up this property, so we can
1828 create the charset table. */
1829 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1830 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1831
1832 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil);
1833
1834 /* Setup tables. */
1835 for (i = 0; i < 2; i++)
1836 for (j = 0; j < 2; j++)
1837 for (k = 0; k < 128; k++)
1838 iso_charset_table [i][j][k] = -1;
1839
1840 bzero (cmpchar_hash_table, sizeof cmpchar_hash_table);
1841 cmpchar_table_size = n_cmpchars = 0;
1842
1843 for (i = 0; i < 256; i++)
1844 BYTES_BY_CHAR_HEAD (i) = 1;
1845 for (i = MIN_CHARSET_OFFICIAL_DIMENSION1;
1846 i <= MAX_CHARSET_OFFICIAL_DIMENSION1; i++)
1847 BYTES_BY_CHAR_HEAD (i) = 2;
1848 for (i = MIN_CHARSET_OFFICIAL_DIMENSION2;
1849 i <= MAX_CHARSET_OFFICIAL_DIMENSION2; i++)
1850 BYTES_BY_CHAR_HEAD (i) = 3;
1851 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 3;
1852 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 3;
1853 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 4;
1854 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 4;
1855 /* The followings don't reflect the actual bytes, but just to tell
1856 that it is a start of a multibyte character. */
1857 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION) = 2;
1858 BYTES_BY_CHAR_HEAD (0x9E) = 2;
1859 BYTES_BY_CHAR_HEAD (0x9F) = 2;
1860
1861 for (i = 0; i < 128; i++)
1862 WIDTH_BY_CHAR_HEAD (i) = 1;
1863 for (; i < 256; i++)
1864 WIDTH_BY_CHAR_HEAD (i) = 4;
1865 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11) = 1;
1866 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12) = 2;
1867 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21) = 1;
1868 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22) = 2;
1869
1870 {
1871 Lisp_Object val;
1872
1873 val = Qnil;
1874 for (i = 0x81; i < 0x90; i++)
1875 val = Fcons (make_number ((i - 0x70) << 7), val);
1876 for (; i < 0x9A; i++)
1877 val = Fcons (make_number ((i - 0x8F) << 14), val);
1878 for (i = 0xA0; i < 0xF0; i++)
1879 val = Fcons (make_number ((i - 0x70) << 7), val);
1880 for (; i < 0xFF; i++)
1881 val = Fcons (make_number ((i - 0xE0) << 14), val);
1882 val = Fcons (make_number (GENERIC_COMPOSITION_CHAR), val);
1883 Vgeneric_character_list = Fnreverse (val);
1884 }
1885
1886 nonascii_insert_offset = 0;
1887 Vnonascii_translation_table = Qnil;
1888 }
1889
1890 #ifdef emacs
1891
1892 void
1893 syms_of_charset ()
1894 {
1895 Qascii = intern ("ascii");
1896 staticpro (&Qascii);
1897
1898 Qcharset = intern ("charset");
1899 staticpro (&Qcharset);
1900
1901 /* Define ASCII charset now. */
1902 update_charset_table (make_number (CHARSET_ASCII),
1903 make_number (1), make_number (94),
1904 make_number (1),
1905 make_number (0),
1906 make_number ('B'),
1907 make_number (0),
1908 build_string ("ASCII"),
1909 build_string ("ASCII"),
1910 build_string ("ASCII (ISO646 IRV)"));
1911 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1912 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1913
1914 Qcomposition = intern ("composition");
1915 staticpro (&Qcomposition);
1916 CHARSET_SYMBOL (CHARSET_COMPOSITION) = Qcomposition;
1917
1918 Qauto_fill_chars = intern ("auto-fill-chars");
1919 staticpro (&Qauto_fill_chars);
1920 Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1921
1922 defsubr (&Sdefine_charset);
1923 defsubr (&Sgeneric_character_list);
1924 defsubr (&Sget_unused_iso_final_char);
1925 defsubr (&Sdeclare_equiv_charset);
1926 defsubr (&Sfind_charset_region);
1927 defsubr (&Sfind_charset_string);
1928 defsubr (&Smake_char_internal);
1929 defsubr (&Ssplit_char);
1930 defsubr (&Schar_charset);
1931 defsubr (&Scharset_after);
1932 defsubr (&Siso_charset);
1933 defsubr (&Schar_valid_p);
1934 defsubr (&Sunibyte_char_to_multibyte);
1935 defsubr (&Smultibyte_char_to_unibyte);
1936 defsubr (&Schar_bytes);
1937 defsubr (&Schar_width);
1938 defsubr (&Sstring_width);
1939 defsubr (&Schar_direction);
1940 defsubr (&Schars_in_region);
1941 defsubr (&Sstring);
1942 defsubr (&Scmpcharp);
1943 defsubr (&Scmpchar_component);
1944 defsubr (&Scmpchar_cmp_rule);
1945 defsubr (&Scmpchar_cmp_rule_p);
1946 defsubr (&Scmpchar_cmp_count);
1947 defsubr (&Scompose_string);
1948 defsubr (&Ssetup_special_charsets);
1949
1950 DEFVAR_LISP ("charset-list", &Vcharset_list,
1951 "List of charsets ever defined.");
1952 Vcharset_list = Fcons (Qascii, Qnil);
1953
1954 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
1955 "Vector of cons cell of a symbol and translation table ever defined.\n\
1956 An ID of a translation table is an index of this vector.");
1957 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1958
1959 DEFVAR_INT ("leading-code-composition", &leading_code_composition,
1960 "Leading-code of composite characters.");
1961 leading_code_composition = LEADING_CODE_COMPOSITION;
1962
1963 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1964 "Leading-code of private TYPE9N charset of column-width 1.");
1965 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1966
1967 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1968 "Leading-code of private TYPE9N charset of column-width 2.");
1969 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1970
1971 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1972 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1973 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1974
1975 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1976 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1977 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1978
1979 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1980 "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
1981 This is used for converting unibyte text to multibyte,\n\
1982 and for inserting character codes specified by number.\n\n\
1983 This serves to convert a Latin-1 or similar 8-bit character code\n\
1984 to the corresponding Emacs multibyte character code.\n\
1985 Typically the value should be (- (make-char CHARSET 0) 128),\n\
1986 for your choice of character set.\n\
1987 If `nonascii-translation-table' is non-nil, it overrides this variable.");
1988 nonascii_insert_offset = 0;
1989
1990 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1991 "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
1992 This is used for converting unibyte text to multibyte,\n\
1993 and for inserting character codes specified by number.\n\n\
1994 Conversion is performed only when multibyte characters are enabled,\n\
1995 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1996 to the corresponding Emacs character code.\n\n\
1997 If this is nil, `nonascii-insert-offset' is used instead.\n\
1998 See also the docstring of `make-translation-table'.");
1999 Vnonascii_translation_table = Qnil;
2000
2001 DEFVAR_INT ("min-composite-char", &min_composite_char,
2002 "Minimum character code of a composite character.");
2003 min_composite_char = MIN_CHAR_COMPOSITION;
2004
2005 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
2006 "A char-table for characters which invoke auto-filling.\n\
2007 Such characters has value t in this table.");
2008 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
2009 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
2010 CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
2011 }
2012
2013 #endif /* emacs */