]> code.delx.au - gnu-emacs/blob - src/charset.c
* alloc.c (make_interval, allocate_string)
[gnu-emacs] / src / charset.c
1 /* Basic multilingual character support.
2 Copyright (C) 2001, 2002, 2003, 2004, 2005 Free Software Foundation, Inc.
3 Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
4 National Institute of Advanced Industrial Science and Technology (AIST)
5 Registration Number H14PRO021
6
7 This file is part of GNU Emacs.
8
9 GNU Emacs is free software; you can redistribute it and/or modify
10 it under the terms of the GNU General Public License as published by
11 the Free Software Foundation; either version 2, or (at your option)
12 any later version.
13
14 GNU Emacs is distributed in the hope that it will be useful,
15 but WITHOUT ANY WARRANTY; without even the implied warranty of
16 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 GNU General Public License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GNU Emacs; see the file COPYING. If not, write to
21 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
22 Boston, MA 02110-1301, USA. */
23
24 /* At first, see the document in `charset.h' to understand the code in
25 this file. */
26
27 #ifdef emacs
28 #include <config.h>
29 #endif
30
31 #include <stdio.h>
32
33 #ifdef emacs
34
35 #include <sys/types.h>
36 #include "lisp.h"
37 #include "buffer.h"
38 #include "charset.h"
39 #include "composite.h"
40 #include "coding.h"
41 #include "disptab.h"
42
43 #else /* not emacs */
44
45 #include "mulelib.h"
46
47 #endif /* emacs */
48
49 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
50 Lisp_Object Qunknown;
51
52 /* Declaration of special leading-codes. */
53 EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
54 EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
55 EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
56 EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
57
58 /* Declaration of special charsets. The values are set by
59 Fsetup_special_charsets. */
60 int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
61 int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
62 int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
63 int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
64 int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
65 int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
66 int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
67 int charset_mule_unicode_0100_24ff;
68 int charset_mule_unicode_2500_33ff;
69 int charset_mule_unicode_e000_ffff;
70
71 Lisp_Object Qcharset_table;
72
73 /* A char-table containing information of each character set. */
74 Lisp_Object Vcharset_table;
75
76 /* A vector of charset symbol indexed by charset-id. This is used
77 only for returning charset symbol from C functions. */
78 Lisp_Object Vcharset_symbol_table;
79
80 /* A list of charset symbols ever defined. */
81 Lisp_Object Vcharset_list;
82
83 /* Vector of translation table ever defined.
84 ID of a translation table is used to index this vector. */
85 Lisp_Object Vtranslation_table_vector;
86
87 /* A char-table for characters which may invoke auto-filling. */
88 Lisp_Object Vauto_fill_chars;
89
90 Lisp_Object Qauto_fill_chars;
91
92 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
93 int bytes_by_char_head[256];
94 int width_by_char_head[256];
95
96 /* Mapping table from ISO2022's charset (specified by DIMENSION,
97 CHARS, and FINAL-CHAR) to Emacs' charset. */
98 int iso_charset_table[2][2][128];
99
100 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
101 unsigned char *_fetch_multibyte_char_p;
102 int _fetch_multibyte_char_len;
103
104 /* Offset to add to a non-ASCII value when inserting it. */
105 EMACS_INT nonascii_insert_offset;
106
107 /* Translation table for converting non-ASCII unibyte characters
108 to multibyte codes, or nil. */
109 Lisp_Object Vnonascii_translation_table;
110
111 /* List of all possible generic characters. */
112 Lisp_Object Vgeneric_character_list;
113
114 \f
115 void
116 invalid_character (c)
117 int c;
118 {
119 error ("Invalid character: %d, #o%o, #x%x", c, c, c);
120 }
121
122 /* Parse string STR of length LENGTH and fetch information of a
123 character at STR. Set BYTES to the byte length the character
124 occupies, CHARSET, C1, C2 to proper values of the character. */
125
126 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
127 do { \
128 (c1) = *(str); \
129 (bytes) = BYTES_BY_CHAR_HEAD (c1); \
130 if ((bytes) == 1) \
131 (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
132 else if ((bytes) == 2) \
133 { \
134 if ((c1) == LEADING_CODE_8_BIT_CONTROL) \
135 (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \
136 else \
137 (charset) = (c1), (c1) = (str)[1] & 0x7F; \
138 } \
139 else if ((bytes) == 3) \
140 { \
141 if ((c1) < LEADING_CODE_PRIVATE_11) \
142 (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \
143 else \
144 (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \
145 } \
146 else \
147 (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \
148 } while (0)
149
150 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
151 Note that this intentionally allows invalid components, such
152 as 0xA0 0xA0, because there exist many files that contain
153 such invalid byte sequences, especially in EUC-GB. */
154 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
155 ((charset) == CHARSET_ASCII \
156 ? ((c1) >= 0 && (c1) <= 0x7F) \
157 : ((charset) == CHARSET_8_BIT_CONTROL \
158 ? ((c1) >= 0x80 && (c1) <= 0x9F) \
159 : ((charset) == CHARSET_8_BIT_GRAPHIC \
160 ? ((c1) >= 0x80 && (c1) <= 0xFF) \
161 : (CHARSET_DIMENSION (charset) == 1 \
162 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
163 : ((c1) >= 0x20 && (c1) <= 0x7F \
164 && (c2) >= 0x20 && (c2) <= 0x7F)))))
165
166 /* Store multi-byte form of the character C in STR. The caller should
167 allocate at least 4-byte area at STR in advance. Returns the
168 length of the multi-byte form. If C is an invalid character code,
169 return -1. */
170
171 int
172 char_to_string_1 (c, str)
173 int c;
174 unsigned char *str;
175 {
176 unsigned char *p = str;
177
178 if (c & CHAR_MODIFIER_MASK) /* This includes the case C is negative. */
179 {
180 /* Multibyte character can't have a modifier bit. */
181 if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
182 return -1;
183
184 /* For Meta, Shift, and Control modifiers, we need special care. */
185 if (c & CHAR_META)
186 {
187 /* Move the meta bit to the right place for a string. */
188 c = (c & ~CHAR_META) | 0x80;
189 }
190 if (c & CHAR_SHIFT)
191 {
192 /* Shift modifier is valid only with [A-Za-z]. */
193 if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
194 c &= ~CHAR_SHIFT;
195 else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
196 c = (c & ~CHAR_SHIFT) - ('a' - 'A');
197 }
198 if (c & CHAR_CTL)
199 {
200 /* Simulate the code in lread.c. */
201 /* Allow `\C- ' and `\C-?'. */
202 if (c == (CHAR_CTL | ' '))
203 c = 0;
204 else if (c == (CHAR_CTL | '?'))
205 c = 127;
206 /* ASCII control chars are made from letters (both cases),
207 as well as the non-letters within 0100...0137. */
208 else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
209 c &= (037 | (~0177 & ~CHAR_CTL));
210 else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
211 c &= (037 | (~0177 & ~CHAR_CTL));
212 }
213
214 /* If C still has any modifier bits, just ignore it. */
215 c &= ~CHAR_MODIFIER_MASK;
216 }
217
218 if (SINGLE_BYTE_CHAR_P (c))
219 {
220 if (ASCII_BYTE_P (c) || c >= 0xA0)
221 *p++ = c;
222 else
223 {
224 *p++ = LEADING_CODE_8_BIT_CONTROL;
225 *p++ = c + 0x20;
226 }
227 }
228 else if (CHAR_VALID_P (c, 0))
229 {
230 int charset, c1, c2;
231
232 SPLIT_CHAR (c, charset, c1, c2);
233
234 if (charset >= LEADING_CODE_EXT_11)
235 *p++ = (charset < LEADING_CODE_EXT_12
236 ? LEADING_CODE_PRIVATE_11
237 : (charset < LEADING_CODE_EXT_21
238 ? LEADING_CODE_PRIVATE_12
239 : (charset < LEADING_CODE_EXT_22
240 ? LEADING_CODE_PRIVATE_21
241 : LEADING_CODE_PRIVATE_22)));
242 *p++ = charset;
243 if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
244 return -1;
245 if (c1)
246 {
247 *p++ = c1 | 0x80;
248 if (c2 > 0)
249 *p++ = c2 | 0x80;
250 }
251 }
252 else
253 return -1;
254
255 return (p - str);
256 }
257
258
259 /* Store multi-byte form of the character C in STR. The caller should
260 allocate at least 4-byte area at STR in advance. Returns the
261 length of the multi-byte form. If C is an invalid character code,
262 signal an error.
263
264 Use macro `CHAR_STRING (C, STR)' instead of calling this function
265 directly if C can be an ASCII character. */
266
267 int
268 char_to_string (c, str)
269 int c;
270 unsigned char *str;
271 {
272 int len;
273 len = char_to_string_1 (c, str);
274 if (len == -1)
275 invalid_character (c);
276 return len;
277 }
278
279
280 /* Return the non-ASCII character corresponding to multi-byte form at
281 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
282 length of the multibyte form in *ACTUAL_LEN.
283
284 Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
285 this function directly if you want ot handle ASCII characters as
286 well. */
287
288 int
289 string_to_char (str, len, actual_len)
290 const unsigned char *str;
291 int len, *actual_len;
292 {
293 int c, bytes, charset, c1, c2;
294
295 SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
296 c = MAKE_CHAR (charset, c1, c2);
297 if (actual_len)
298 *actual_len = bytes;
299 return c;
300 }
301
302 /* Return the length of the multi-byte form at string STR of length LEN.
303 Use the macro MULTIBYTE_FORM_LENGTH instead. */
304 int
305 multibyte_form_length (str, len)
306 const unsigned char *str;
307 int len;
308 {
309 int bytes;
310
311 PARSE_MULTIBYTE_SEQ (str, len, bytes);
312 return bytes;
313 }
314
315 /* Check multibyte form at string STR of length LEN and set variables
316 pointed by CHARSET, C1, and C2 to charset and position codes of the
317 character at STR, and return 0. If there's no multibyte character,
318 return -1. This should be used only in the macro SPLIT_STRING
319 which checks range of STR in advance. */
320
321 int
322 split_string (str, len, charset, c1, c2)
323 const unsigned char *str;
324 unsigned char *c1, *c2;
325 int len, *charset;
326 {
327 register int bytes, cs, code1, code2 = -1;
328
329 SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
330 if (cs == CHARSET_ASCII)
331 return -1;
332 *charset = cs;
333 *c1 = code1;
334 *c2 = code2;
335 return 0;
336 }
337
338 /* Return 1 iff character C has valid printable glyph.
339 Use the macro CHAR_PRINTABLE_P instead. */
340 int
341 char_printable_p (c)
342 int c;
343 {
344 int charset, c1, c2;
345
346 if (ASCII_BYTE_P (c))
347 return 1;
348 else if (SINGLE_BYTE_CHAR_P (c))
349 return 0;
350 else if (c >= MAX_CHAR)
351 return 0;
352
353 SPLIT_CHAR (c, charset, c1, c2);
354 if (! CHARSET_DEFINED_P (charset))
355 return 0;
356 if (CHARSET_CHARS (charset) == 94
357 ? c1 <= 32 || c1 >= 127
358 : c1 < 32)
359 return 0;
360 if (CHARSET_DIMENSION (charset) == 2
361 && (CHARSET_CHARS (charset) == 94
362 ? c2 <= 32 || c2 >= 127
363 : c2 < 32))
364 return 0;
365 return 1;
366 }
367
368 /* Translate character C by translation table TABLE. If C
369 is negative, translate a character specified by CHARSET, C1, and C2
370 (C1 and C2 are code points of the character). If no translation is
371 found in TABLE, return C. */
372 int
373 translate_char (table, c, charset, c1, c2)
374 Lisp_Object table;
375 int c, charset, c1, c2;
376 {
377 Lisp_Object ch;
378 int alt_charset, alt_c1, alt_c2, dimension;
379
380 if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
381 if (!CHAR_TABLE_P (table)
382 || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
383 return c;
384
385 SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
386 dimension = CHARSET_DIMENSION (alt_charset);
387 if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
388 /* CH is not a generic character, just return it. */
389 return XFASTINT (ch);
390
391 /* Since CH is a generic character, we must return a specific
392 charater which has the same position codes as C from CH. */
393 if (charset < 0)
394 SPLIT_CHAR (c, charset, c1, c2);
395 if (dimension != CHARSET_DIMENSION (charset))
396 /* We can't make such a character because of dimension mismatch. */
397 return c;
398 return MAKE_CHAR (alt_charset, c1, c2);
399 }
400
401 /* Convert the unibyte character C to multibyte based on
402 Vnonascii_translation_table or nonascii_insert_offset. If they can't
403 convert C to a valid multibyte character, convert it based on
404 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
405
406 int
407 unibyte_char_to_multibyte (c)
408 int c;
409 {
410 if (c < 0400 && c >= 0200)
411 {
412 int c_save = c;
413
414 if (! NILP (Vnonascii_translation_table))
415 {
416 c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
417 if (c >= 0400 && ! char_valid_p (c, 0))
418 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
419 }
420 else if (c >= 0240 && nonascii_insert_offset > 0)
421 {
422 c += nonascii_insert_offset;
423 if (c < 0400 || ! char_valid_p (c, 0))
424 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
425 }
426 else if (c >= 0240)
427 c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
428 }
429 return c;
430 }
431
432
433 /* Convert the multibyte character C to unibyte 8-bit character based
434 on Vnonascii_translation_table or nonascii_insert_offset. If
435 REV_TBL is non-nil, it should be a reverse table of
436 Vnonascii_translation_table, i.e. what given by:
437 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
438
439 int
440 multibyte_char_to_unibyte (c, rev_tbl)
441 int c;
442 Lisp_Object rev_tbl;
443 {
444 if (!SINGLE_BYTE_CHAR_P (c))
445 {
446 int c_save = c;
447
448 if (! CHAR_TABLE_P (rev_tbl)
449 && CHAR_TABLE_P (Vnonascii_translation_table))
450 rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
451 make_number (0));
452 if (CHAR_TABLE_P (rev_tbl))
453 {
454 Lisp_Object temp;
455 temp = Faref (rev_tbl, make_number (c));
456 if (INTEGERP (temp))
457 c = XINT (temp);
458 if (c >= 256)
459 c = (c_save & 0177) + 0200;
460 }
461 else
462 {
463 if (nonascii_insert_offset > 0)
464 c -= nonascii_insert_offset;
465 if (c < 128 || c >= 256)
466 c = (c_save & 0177) + 0200;
467 }
468 }
469
470 return c;
471 }
472
473 \f
474 /* Update the table Vcharset_table with the given arguments (see the
475 document of `define-charset' for the meaning of each argument).
476 Several other table contents are also updated. The caller should
477 check the validity of CHARSET-ID and the remaining arguments in
478 advance. */
479
480 void
481 update_charset_table (charset_id, dimension, chars, width, direction,
482 iso_final_char, iso_graphic_plane,
483 short_name, long_name, description)
484 Lisp_Object charset_id, dimension, chars, width, direction;
485 Lisp_Object iso_final_char, iso_graphic_plane;
486 Lisp_Object short_name, long_name, description;
487 {
488 int charset = XINT (charset_id);
489 int bytes;
490 unsigned char leading_code_base, leading_code_ext;
491
492 if (NILP (CHARSET_TABLE_ENTRY (charset)))
493 CHARSET_TABLE_ENTRY (charset)
494 = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
495
496 if (NILP (long_name))
497 long_name = short_name;
498 if (NILP (description))
499 description = long_name;
500
501 /* Get byte length of multibyte form, base leading-code, and
502 extended leading-code of the charset. See the comment under the
503 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
504 bytes = XINT (dimension);
505 if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
506 {
507 /* Official charset, it doesn't have an extended leading-code. */
508 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
509 bytes += 1; /* For a base leading-code. */
510 leading_code_base = charset;
511 leading_code_ext = 0;
512 }
513 else
514 {
515 /* Private charset. */
516 bytes += 2; /* For base and extended leading-codes. */
517 leading_code_base
518 = (charset < LEADING_CODE_EXT_12
519 ? LEADING_CODE_PRIVATE_11
520 : (charset < LEADING_CODE_EXT_21
521 ? LEADING_CODE_PRIVATE_12
522 : (charset < LEADING_CODE_EXT_22
523 ? LEADING_CODE_PRIVATE_21
524 : LEADING_CODE_PRIVATE_22)));
525 leading_code_ext = charset;
526 if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
527 error ("Invalid dimension for the charset-ID %d", charset);
528 }
529
530 CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
531 CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
532 CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
533 CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
534 CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
535 CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
536 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
537 = make_number (leading_code_base);
538 CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
539 = make_number (leading_code_ext);
540 CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
541 CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
542 = iso_graphic_plane;
543 CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
544 CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
545 CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
546 CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
547
548 {
549 /* If we have already defined a charset which has the same
550 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
551 DIRECTION, we must update the entry REVERSE-CHARSET of both
552 charsets. If there's no such charset, the value of the entry
553 is set to nil. */
554 int i;
555
556 for (i = 0; i <= MAX_CHARSET; i++)
557 if (!NILP (CHARSET_TABLE_ENTRY (i)))
558 {
559 if (CHARSET_DIMENSION (i) == XINT (dimension)
560 && CHARSET_CHARS (i) == XINT (chars)
561 && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
562 && CHARSET_DIRECTION (i) != XINT (direction))
563 {
564 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
565 = make_number (i);
566 CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
567 break;
568 }
569 }
570 if (i > MAX_CHARSET)
571 /* No such a charset. */
572 CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
573 = make_number (-1);
574 }
575
576 if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
577 && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
578 {
579 bytes_by_char_head[leading_code_base] = bytes;
580 width_by_char_head[leading_code_base] = XINT (width);
581
582 /* Update table emacs_code_class. */
583 emacs_code_class[charset] = (bytes == 2
584 ? EMACS_leading_code_2
585 : (bytes == 3
586 ? EMACS_leading_code_3
587 : EMACS_leading_code_4));
588 }
589
590 /* Update table iso_charset_table. */
591 if (XINT (iso_final_char) >= 0
592 && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
593 ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
594 }
595
596 #ifdef emacs
597
598 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
599 is invalid. */
600 int
601 get_charset_id (charset_symbol)
602 Lisp_Object charset_symbol;
603 {
604 Lisp_Object val;
605 int charset;
606
607 /* This originally used a ?: operator, but reportedly the HP-UX
608 compiler version HP92453-01 A.10.32.22 miscompiles that. */
609 if (SYMBOLP (charset_symbol)
610 && VECTORP (val = Fget (charset_symbol, Qcharset))
611 && CHARSET_VALID_P (charset =
612 XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
613 return charset;
614 else
615 return -1;
616 }
617
618 /* Return an identification number for a new private charset of
619 DIMENSION and WIDTH. If there's no more room for the new charset,
620 return 0. */
621 Lisp_Object
622 get_new_private_charset_id (dimension, width)
623 int dimension, width;
624 {
625 int charset, from, to;
626
627 if (dimension == 1)
628 {
629 from = LEADING_CODE_EXT_11;
630 to = LEADING_CODE_EXT_21;
631 }
632 else
633 {
634 from = LEADING_CODE_EXT_21;
635 to = LEADING_CODE_EXT_MAX + 1;
636 }
637
638 for (charset = from; charset < to; charset++)
639 if (!CHARSET_DEFINED_P (charset)) break;
640
641 return make_number (charset < to ? charset : 0);
642 }
643
644 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
645 doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
646 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
647 treated as a private charset.
648 INFO-VECTOR is a vector of the format:
649 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
650 SHORT-NAME LONG-NAME DESCRIPTION]
651 The meanings of each elements is as follows:
652 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
653 CHARS (integer) is the number of characters in a dimension: 94 or 96.
654 WIDTH (integer) is the number of columns a character in the charset
655 occupies on the screen: one of 0, 1, and 2.
656
657 DIRECTION (integer) is the rendering direction of characters in the
658 charset when rendering. If 0, render from left to right, else
659 render from right to left.
660
661 ISO-FINAL-CHAR (character) is the final character of the
662 corresponding ISO 2022 charset.
663 It may be -1 if the charset is internal use only.
664
665 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
666 while encoding to variants of ISO 2022 coding system, one of the
667 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
668 It may be -1 if the charset is internal use only.
669
670 SHORT-NAME (string) is the short name to refer to the charset.
671
672 LONG-NAME (string) is the long name to refer to the charset.
673
674 DESCRIPTION (string) is the description string of the charset. */)
675 (charset_id, charset_symbol, info_vector)
676 Lisp_Object charset_id, charset_symbol, info_vector;
677 {
678 Lisp_Object *vec;
679
680 if (!NILP (charset_id))
681 CHECK_NUMBER (charset_id);
682 CHECK_SYMBOL (charset_symbol);
683 CHECK_VECTOR (info_vector);
684
685 if (! NILP (charset_id))
686 {
687 if (! CHARSET_VALID_P (XINT (charset_id)))
688 error ("Invalid CHARSET: %d", XINT (charset_id));
689 else if (CHARSET_DEFINED_P (XINT (charset_id)))
690 error ("Already defined charset: %d", XINT (charset_id));
691 }
692
693 vec = XVECTOR (info_vector)->contents;
694 if (XVECTOR (info_vector)->size != 9
695 || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
696 || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
697 || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
698 || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
699 || !INTEGERP (vec[4])
700 || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
701 || !INTEGERP (vec[5])
702 || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
703 || !STRINGP (vec[6])
704 || !STRINGP (vec[7])
705 || !STRINGP (vec[8]))
706 error ("Invalid info-vector argument for defining charset %s",
707 SDATA (SYMBOL_NAME (charset_symbol)));
708
709 if (NILP (charset_id))
710 {
711 charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
712 if (XINT (charset_id) == 0)
713 error ("There's no room for a new private charset %s",
714 SDATA (SYMBOL_NAME (charset_symbol)));
715 }
716
717 update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
718 vec[4], vec[5], vec[6], vec[7], vec[8]);
719 Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
720 CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
721 Vcharset_list = Fcons (charset_symbol, Vcharset_list);
722 Fupdate_coding_systems_internal ();
723 return Qnil;
724 }
725
726 DEFUN ("generic-character-list", Fgeneric_character_list,
727 Sgeneric_character_list, 0, 0, 0,
728 doc: /* Return a list of all possible generic characters.
729 It includes a generic character for a charset not yet defined. */)
730 ()
731 {
732 return Vgeneric_character_list;
733 }
734
735 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
736 Sget_unused_iso_final_char, 2, 2, 0,
737 doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
738 DIMENSION is the number of bytes to represent a character: 1 or 2.
739 CHARS is the number of characters in a dimension: 94 or 96.
740
741 This final char is for private use, thus the range is `0' (48) .. `?' (63).
742 If there's no unused final char for the specified kind of charset,
743 return nil. */)
744 (dimension, chars)
745 Lisp_Object dimension, chars;
746 {
747 int final_char;
748
749 CHECK_NUMBER (dimension);
750 CHECK_NUMBER (chars);
751 if (XINT (dimension) != 1 && XINT (dimension) != 2)
752 error ("Invalid charset dimension %d, it should be 1 or 2",
753 XINT (dimension));
754 if (XINT (chars) != 94 && XINT (chars) != 96)
755 error ("Invalid charset chars %d, it should be 94 or 96",
756 XINT (chars));
757 for (final_char = '0'; final_char <= '?'; final_char++)
758 {
759 if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
760 break;
761 }
762 return (final_char <= '?' ? make_number (final_char) : Qnil);
763 }
764
765 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
766 4, 4, 0,
767 doc: /* Declare an equivalent charset for ISO-2022 decoding.
768
769 On decoding by an ISO-2022 base coding system, when a charset
770 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
771 if CHARSET is designated instead. */)
772 (dimension, chars, final_char, charset)
773 Lisp_Object dimension, chars, final_char, charset;
774 {
775 int charset_id;
776
777 CHECK_NUMBER (dimension);
778 CHECK_NUMBER (chars);
779 CHECK_NUMBER (final_char);
780 CHECK_SYMBOL (charset);
781
782 if (XINT (dimension) != 1 && XINT (dimension) != 2)
783 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
784 if (XINT (chars) != 94 && XINT (chars) != 96)
785 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
786 if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
787 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
788 if ((charset_id = get_charset_id (charset)) < 0)
789 error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
790
791 ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
792 return Qnil;
793 }
794
795 /* Return information about charsets in the text at PTR of NBYTES
796 bytes, which are NCHARS characters. The value is:
797
798 0: Each character is represented by one byte. This is always
799 true for unibyte text.
800 1: No charsets other than ascii eight-bit-control,
801 eight-bit-graphic, and latin-1 are found.
802 2: Otherwise.
803
804 In addition, if CHARSETS is nonzero, for each found charset N, set
805 CHARSETS[N] to 1. For that, callers should allocate CHARSETS
806 (MAX_CHARSET + 1 elements) in advance. It may lookup a translation
807 table TABLE if supplied. For invalid charsets, set CHARSETS[1] to
808 1 (note that there's no charset whose ID is 1). */
809
810 int
811 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
812 const unsigned char *ptr;
813 int nchars, nbytes, *charsets;
814 Lisp_Object table;
815 {
816 if (nchars == nbytes)
817 {
818 if (charsets && nbytes > 0)
819 {
820 const unsigned char *endp = ptr + nbytes;
821 int maskbits = 0;
822
823 while (ptr < endp && maskbits != 7)
824 {
825 maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
826 ptr++;
827 }
828
829 if (maskbits & 1)
830 charsets[CHARSET_ASCII] = 1;
831 if (maskbits & 2)
832 charsets[CHARSET_8_BIT_CONTROL] = 1;
833 if (maskbits & 4)
834 charsets[CHARSET_8_BIT_GRAPHIC] = 1;
835 }
836 return 0;
837 }
838 else
839 {
840 int return_val = 1;
841 int bytes, charset, c1, c2;
842
843 if (! CHAR_TABLE_P (table))
844 table = Qnil;
845
846 while (nchars-- > 0)
847 {
848 SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
849 ptr += bytes;
850
851 if (!CHARSET_DEFINED_P (charset))
852 charset = 1;
853 else if (! NILP (table))
854 {
855 int c = translate_char (table, -1, charset, c1, c2);
856 if (c >= 0)
857 charset = CHAR_CHARSET (c);
858 }
859
860 if (return_val == 1
861 && charset != CHARSET_ASCII
862 && charset != CHARSET_8_BIT_CONTROL
863 && charset != CHARSET_8_BIT_GRAPHIC
864 && charset != charset_latin_iso8859_1)
865 return_val = 2;
866
867 if (charsets)
868 charsets[charset] = 1;
869 else if (return_val == 2)
870 break;
871 }
872 return return_val;
873 }
874 }
875
876 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
877 2, 3, 0,
878 doc: /* Return a list of charsets in the region between BEG and END.
879 BEG and END are buffer positions.
880 Optional arg TABLE if non-nil is a translation table to look up.
881
882 If the region contains invalid multibyte characters,
883 `unknown' is included in the returned list.
884
885 If the current buffer is unibyte, the returned list may contain
886 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
887 (beg, end, table)
888 Lisp_Object beg, end, table;
889 {
890 int charsets[MAX_CHARSET + 1];
891 int from, from_byte, to, stop, stop_byte, i;
892 Lisp_Object val;
893
894 validate_region (&beg, &end);
895 from = XFASTINT (beg);
896 stop = to = XFASTINT (end);
897
898 if (from < GPT && GPT < to)
899 {
900 stop = GPT;
901 stop_byte = GPT_BYTE;
902 }
903 else
904 stop_byte = CHAR_TO_BYTE (stop);
905
906 from_byte = CHAR_TO_BYTE (from);
907
908 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
909 while (1)
910 {
911 find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
912 stop_byte - from_byte, charsets, table);
913 if (stop < to)
914 {
915 from = stop, from_byte = stop_byte;
916 stop = to, stop_byte = CHAR_TO_BYTE (stop);
917 }
918 else
919 break;
920 }
921
922 val = Qnil;
923 if (charsets[1])
924 val = Fcons (Qunknown, val);
925 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
926 if (charsets[i])
927 val = Fcons (CHARSET_SYMBOL (i), val);
928 if (charsets[0])
929 val = Fcons (Qascii, val);
930 return val;
931 }
932
933 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
934 1, 2, 0,
935 doc: /* Return a list of charsets in STR.
936 Optional arg TABLE if non-nil is a translation table to look up.
937
938 If the string contains invalid multibyte characters,
939 `unknown' is included in the returned list.
940
941 If STR is unibyte, the returned list may contain
942 only `ascii', `eight-bit-control', and `eight-bit-graphic'. */)
943 (str, table)
944 Lisp_Object str, table;
945 {
946 int charsets[MAX_CHARSET + 1];
947 int i;
948 Lisp_Object val;
949
950 CHECK_STRING (str);
951
952 bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
953 find_charset_in_text (SDATA (str), SCHARS (str),
954 SBYTES (str), charsets, table);
955
956 val = Qnil;
957 if (charsets[1])
958 val = Fcons (Qunknown, val);
959 for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
960 if (charsets[i])
961 val = Fcons (CHARSET_SYMBOL (i), val);
962 if (charsets[0])
963 val = Fcons (Qascii, val);
964 return val;
965 }
966
967 \f
968 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
969 doc: /* Return a character made from arguments.
970 Internal use only. */)
971 (charset, code1, code2)
972 Lisp_Object charset, code1, code2;
973 {
974 int charset_id, c1, c2;
975
976 CHECK_NUMBER (charset);
977 charset_id = XINT (charset);
978 if (!CHARSET_DEFINED_P (charset_id))
979 error ("Invalid charset ID: %d", XINT (charset));
980
981 if (NILP (code1))
982 c1 = 0;
983 else
984 {
985 CHECK_NUMBER (code1);
986 c1 = XINT (code1);
987 }
988 if (NILP (code2))
989 c2 = 0;
990 else
991 {
992 CHECK_NUMBER (code2);
993 c2 = XINT (code2);
994 }
995
996 if (charset_id == CHARSET_ASCII)
997 {
998 if (c1 < 0 || c1 > 0x7F)
999 goto invalid_code_posints;
1000 return make_number (c1);
1001 }
1002 else if (charset_id == CHARSET_8_BIT_CONTROL)
1003 {
1004 if (NILP (code1))
1005 c1 = 0x80;
1006 else if (c1 < 0x80 || c1 > 0x9F)
1007 goto invalid_code_posints;
1008 return make_number (c1);
1009 }
1010 else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1011 {
1012 if (NILP (code1))
1013 c1 = 0xA0;
1014 else if (c1 < 0xA0 || c1 > 0xFF)
1015 goto invalid_code_posints;
1016 return make_number (c1);
1017 }
1018 else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1019 goto invalid_code_posints;
1020 c1 &= 0x7F;
1021 c2 &= 0x7F;
1022 if (c1 == 0
1023 ? c2 != 0
1024 : (c2 == 0
1025 ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1026 : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1027 goto invalid_code_posints;
1028 return make_number (MAKE_CHAR (charset_id, c1, c2));
1029
1030 invalid_code_posints:
1031 error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1032 }
1033
1034 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1035 doc: /* Return list of charset and one or two position-codes of CH.
1036 If CH is invalid as a character code,
1037 return a list of symbol `unknown' and CH. */)
1038 (ch)
1039 Lisp_Object ch;
1040 {
1041 int c, charset, c1, c2;
1042
1043 CHECK_NUMBER (ch);
1044 c = XFASTINT (ch);
1045 if (!CHAR_VALID_P (c, 1))
1046 return Fcons (Qunknown, Fcons (ch, Qnil));
1047 SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1048 return (c2 >= 0
1049 ? Fcons (CHARSET_SYMBOL (charset),
1050 Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1051 : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1052 }
1053
1054 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1055 doc: /* Return charset of CH. */)
1056 (ch)
1057 Lisp_Object ch;
1058 {
1059 CHECK_NUMBER (ch);
1060
1061 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1062 }
1063
1064 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1065 doc: /* Return charset of a character in the current buffer at position POS.
1066 If POS is nil, it defauls to the current point.
1067 If POS is out of range, the value is nil. */)
1068 (pos)
1069 Lisp_Object pos;
1070 {
1071 Lisp_Object ch;
1072 int charset;
1073
1074 ch = Fchar_after (pos);
1075 if (! INTEGERP (ch))
1076 return ch;
1077 charset = CHAR_CHARSET (XINT (ch));
1078 return CHARSET_SYMBOL (charset);
1079 }
1080
1081 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1082 doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1083
1084 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1085 by their DIMENSION, CHARS, and FINAL-CHAR,
1086 where as Emacs distinguishes them by charset symbol.
1087 See the documentation of the function `charset-info' for the meanings of
1088 DIMENSION, CHARS, and FINAL-CHAR. */)
1089 (dimension, chars, final_char)
1090 Lisp_Object dimension, chars, final_char;
1091 {
1092 int charset;
1093
1094 CHECK_NUMBER (dimension);
1095 CHECK_NUMBER (chars);
1096 CHECK_NUMBER (final_char);
1097
1098 if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1099 return Qnil;
1100 return CHARSET_SYMBOL (charset);
1101 }
1102
1103 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1104 generic character. If GENERICP is zero, return nonzero iff C is a
1105 valid normal character. Do not call this function directly,
1106 instead use macro CHAR_VALID_P. */
1107 int
1108 char_valid_p (c, genericp)
1109 int c, genericp;
1110 {
1111 int charset, c1, c2;
1112
1113 if (c < 0 || c >= MAX_CHAR)
1114 return 0;
1115 if (SINGLE_BYTE_CHAR_P (c))
1116 return 1;
1117 SPLIT_CHAR (c, charset, c1, c2);
1118 if (genericp)
1119 {
1120 if (c1)
1121 {
1122 if (c2 <= 0) c2 = 0x20;
1123 }
1124 else
1125 {
1126 if (c2 <= 0) c1 = c2 = 0x20;
1127 }
1128 }
1129 return (CHARSET_DEFINED_P (charset)
1130 && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1131 }
1132
1133 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1134 doc: /* Return t if OBJECT is a valid normal character.
1135 If optional arg GENERICP is non-nil, also return t if OBJECT is
1136 a valid generic character. */)
1137 (object, genericp)
1138 Lisp_Object object, genericp;
1139 {
1140 if (! NATNUMP (object))
1141 return Qnil;
1142 return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1143 }
1144
1145 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1146 Sunibyte_char_to_multibyte, 1, 1, 0,
1147 doc: /* Convert the unibyte character CH to multibyte character.
1148 The conversion is done based on `nonascii-translation-table' (which see)
1149 or `nonascii-insert-offset' (which see). */)
1150 (ch)
1151 Lisp_Object ch;
1152 {
1153 int c;
1154
1155 CHECK_NUMBER (ch);
1156 c = XINT (ch);
1157 if (c < 0 || c >= 0400)
1158 error ("Invalid unibyte character: %d", c);
1159 c = unibyte_char_to_multibyte (c);
1160 if (c < 0)
1161 error ("Can't convert to multibyte character: %d", XINT (ch));
1162 return make_number (c);
1163 }
1164
1165 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1166 Smultibyte_char_to_unibyte, 1, 1, 0,
1167 doc: /* Convert the multibyte character CH to unibyte character.
1168 The conversion is done based on `nonascii-translation-table' (which see)
1169 or `nonascii-insert-offset' (which see). */)
1170 (ch)
1171 Lisp_Object ch;
1172 {
1173 int c;
1174
1175 CHECK_NUMBER (ch);
1176 c = XINT (ch);
1177 if (! CHAR_VALID_P (c, 0))
1178 error ("Invalid multibyte character: %d", c);
1179 c = multibyte_char_to_unibyte (c, Qnil);
1180 if (c < 0)
1181 error ("Can't convert to unibyte character: %d", XINT (ch));
1182 return make_number (c);
1183 }
1184
1185 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1186 doc: /* Return 1 regardless of the argument CH. */)
1187 (ch)
1188 Lisp_Object ch;
1189 {
1190 CHECK_NUMBER (ch);
1191 return make_number (1);
1192 }
1193
1194 /* Return how many bytes C will occupy in a multibyte buffer.
1195 Don't call this function directly, instead use macro CHAR_BYTES. */
1196 int
1197 char_bytes (c)
1198 int c;
1199 {
1200 int charset;
1201
1202 if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1203 return 1;
1204 if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1205 return 1;
1206
1207 charset = CHAR_CHARSET (c);
1208 return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1209 }
1210
1211 /* Return the width of character of which multi-byte form starts with
1212 C. The width is measured by how many columns occupied on the
1213 screen when displayed in the current buffer. */
1214
1215 #define ONE_BYTE_CHAR_WIDTH(c) \
1216 (c < 0x20 \
1217 ? (c == '\t' \
1218 ? XFASTINT (current_buffer->tab_width) \
1219 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1220 : (c < 0x7f \
1221 ? 1 \
1222 : (c == 0x7F \
1223 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1224 : ((! NILP (current_buffer->enable_multibyte_characters) \
1225 && BASE_LEADING_CODE_P (c)) \
1226 ? WIDTH_BY_CHAR_HEAD (c) \
1227 : 4))))
1228
1229 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1230 doc: /* Return width of CH when displayed in the current buffer.
1231 The width is measured by how many columns it occupies on the screen.
1232 Tab is taken to occupy `tab-width' columns. */)
1233 (ch)
1234 Lisp_Object ch;
1235 {
1236 Lisp_Object val, disp;
1237 int c;
1238 struct Lisp_Char_Table *dp = buffer_display_table ();
1239
1240 CHECK_NUMBER (ch);
1241
1242 c = XINT (ch);
1243
1244 /* Get the way the display table would display it. */
1245 disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1246
1247 if (VECTORP (disp))
1248 XSETINT (val, XVECTOR (disp)->size);
1249 else if (SINGLE_BYTE_CHAR_P (c))
1250 XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1251 else
1252 {
1253 int charset = CHAR_CHARSET (c);
1254
1255 XSETFASTINT (val, CHARSET_WIDTH (charset));
1256 }
1257 return val;
1258 }
1259
1260 /* Return width of string STR of length LEN when displayed in the
1261 current buffer. The width is measured by how many columns it
1262 occupies on the screen. */
1263
1264 int
1265 strwidth (str, len)
1266 unsigned char *str;
1267 int len;
1268 {
1269 return c_string_width (str, len, -1, NULL, NULL);
1270 }
1271
1272 /* Return width of string STR of length LEN when displayed in the
1273 current buffer. The width is measured by how many columns it
1274 occupies on the screen. If PRECISION > 0, return the width of
1275 longest substring that doesn't exceed PRECISION, and set number of
1276 characters and bytes of the substring in *NCHARS and *NBYTES
1277 respectively. */
1278
1279 int
1280 c_string_width (str, len, precision, nchars, nbytes)
1281 const unsigned char *str;
1282 int len, precision, *nchars, *nbytes;
1283 {
1284 int i = 0, i_byte = 0;
1285 int width = 0;
1286 int chars;
1287 struct Lisp_Char_Table *dp = buffer_display_table ();
1288
1289 while (i_byte < len)
1290 {
1291 int bytes, thiswidth;
1292 Lisp_Object val;
1293
1294 if (dp)
1295 {
1296 int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1297
1298 chars = 1;
1299 val = DISP_CHAR_VECTOR (dp, c);
1300 if (VECTORP (val))
1301 thiswidth = XVECTOR (val)->size;
1302 else
1303 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1304 }
1305 else
1306 {
1307 chars = 1;
1308 PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1309 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1310 }
1311
1312 if (precision > 0
1313 && (width + thiswidth > precision))
1314 {
1315 *nchars = i;
1316 *nbytes = i_byte;
1317 return width;
1318 }
1319 i++;
1320 i_byte += bytes;
1321 width += thiswidth;
1322 }
1323
1324 if (precision > 0)
1325 {
1326 *nchars = i;
1327 *nbytes = i_byte;
1328 }
1329
1330 return width;
1331 }
1332
1333 /* Return width of Lisp string STRING when displayed in the current
1334 buffer. The width is measured by how many columns it occupies on
1335 the screen while paying attention to compositions. If PRECISION >
1336 0, return the width of longest substring that doesn't exceed
1337 PRECISION, and set number of characters and bytes of the substring
1338 in *NCHARS and *NBYTES respectively. */
1339
1340 int
1341 lisp_string_width (string, precision, nchars, nbytes)
1342 Lisp_Object string;
1343 int precision, *nchars, *nbytes;
1344 {
1345 int len = SCHARS (string);
1346 int len_byte = SBYTES (string);
1347 /* This set multibyte to 0 even if STRING is multibyte when it
1348 contains only ascii and eight-bit-graphic, but that's
1349 intentional. */
1350 int multibyte = len < len_byte;
1351 const unsigned char *str = SDATA (string);
1352 int i = 0, i_byte = 0;
1353 int width = 0;
1354 struct Lisp_Char_Table *dp = buffer_display_table ();
1355
1356 while (i < len)
1357 {
1358 int chars, bytes, thiswidth;
1359 Lisp_Object val;
1360 int cmp_id;
1361 int ignore, end;
1362
1363 if (find_composition (i, -1, &ignore, &end, &val, string)
1364 && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1365 >= 0))
1366 {
1367 thiswidth = composition_table[cmp_id]->width;
1368 chars = end - i;
1369 bytes = string_char_to_byte (string, end) - i_byte;
1370 }
1371 else if (dp)
1372 {
1373 int c;
1374
1375 if (multibyte)
1376 c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1377 else
1378 c = str[i_byte], bytes = 1;
1379 chars = 1;
1380 val = DISP_CHAR_VECTOR (dp, c);
1381 if (VECTORP (val))
1382 thiswidth = XVECTOR (val)->size;
1383 else
1384 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1385 }
1386 else
1387 {
1388 chars = 1;
1389 if (multibyte)
1390 PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1391 else
1392 bytes = 1;
1393 thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1394 }
1395
1396 if (precision > 0
1397 && (width + thiswidth > precision))
1398 {
1399 *nchars = i;
1400 *nbytes = i_byte;
1401 return width;
1402 }
1403 i += chars;
1404 i_byte += bytes;
1405 width += thiswidth;
1406 }
1407
1408 if (precision > 0)
1409 {
1410 *nchars = i;
1411 *nbytes = i_byte;
1412 }
1413
1414 return width;
1415 }
1416
1417 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1418 doc: /* Return width of STRING when displayed in the current buffer.
1419 Width is measured by how many columns it occupies on the screen.
1420 When calculating width of a multibyte character in STRING,
1421 only the base leading-code is considered; the validity of
1422 the following bytes is not checked. Tabs in STRING are always
1423 taken to occupy `tab-width' columns. */)
1424 (string)
1425 Lisp_Object string;
1426 {
1427 Lisp_Object val;
1428
1429 CHECK_STRING (string);
1430 XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
1431 return val;
1432 }
1433
1434 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1435 doc: /* Return the direction of CH.
1436 The returned value is 0 for left-to-right and 1 for right-to-left. */)
1437 (ch)
1438 Lisp_Object ch;
1439 {
1440 int charset;
1441
1442 CHECK_NUMBER (ch);
1443 charset = CHAR_CHARSET (XFASTINT (ch));
1444 if (!CHARSET_DEFINED_P (charset))
1445 invalid_character (XINT (ch));
1446 return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1447 }
1448
1449 /* Return the number of characters in the NBYTES bytes at PTR.
1450 This works by looking at the contents and checking for multibyte sequences.
1451 However, if the current buffer has enable-multibyte-characters = nil,
1452 we treat each byte as a character. */
1453
1454 int
1455 chars_in_text (ptr, nbytes)
1456 const unsigned char *ptr;
1457 int nbytes;
1458 {
1459 /* current_buffer is null at early stages of Emacs initialization. */
1460 if (current_buffer == 0
1461 || NILP (current_buffer->enable_multibyte_characters))
1462 return nbytes;
1463
1464 return multibyte_chars_in_text (ptr, nbytes);
1465 }
1466
1467 /* Return the number of characters in the NBYTES bytes at PTR.
1468 This works by looking at the contents and checking for multibyte sequences.
1469 It ignores enable-multibyte-characters. */
1470
1471 int
1472 multibyte_chars_in_text (ptr, nbytes)
1473 const unsigned char *ptr;
1474 int nbytes;
1475 {
1476 const unsigned char *endp;
1477 int chars, bytes;
1478
1479 endp = ptr + nbytes;
1480 chars = 0;
1481
1482 while (ptr < endp)
1483 {
1484 PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1485 ptr += bytes;
1486 chars++;
1487 }
1488
1489 return chars;
1490 }
1491
1492 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1493 count the numbers of characters and bytes in it. On counting
1494 bytes, pay attention to the fact that 8-bit characters in the range
1495 0x80..0x9F are represented by 2 bytes in multibyte text. */
1496 void
1497 parse_str_as_multibyte (str, len, nchars, nbytes)
1498 const unsigned char *str;
1499 int len, *nchars, *nbytes;
1500 {
1501 const unsigned char *endp = str + len;
1502 int n, chars = 0, bytes = 0;
1503
1504 while (str < endp)
1505 {
1506 if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1507 str += n, bytes += n;
1508 else
1509 str++, bytes += 2;
1510 chars++;
1511 }
1512 *nchars = chars;
1513 *nbytes = bytes;
1514 return;
1515 }
1516
1517 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1518 It actually converts only 8-bit characters in the range 0x80..0x9F
1519 that don't contruct multibyte characters to multibyte forms. If
1520 NCHARS is nonzero, set *NCHARS to the number of characters in the
1521 text. It is assured that we can use LEN bytes at STR as a work
1522 area and that is enough. Return the number of bytes of the
1523 resulting text. */
1524
1525 int
1526 str_as_multibyte (str, len, nbytes, nchars)
1527 unsigned char *str;
1528 int len, nbytes, *nchars;
1529 {
1530 unsigned char *p = str, *endp = str + nbytes;
1531 unsigned char *to;
1532 int chars = 0;
1533 int n;
1534
1535 while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1536 p += n, chars++;
1537 if (nchars)
1538 *nchars = chars;
1539 if (p == endp)
1540 return nbytes;
1541
1542 to = p;
1543 nbytes = endp - p;
1544 endp = str + len;
1545 safe_bcopy (p, endp - nbytes, nbytes);
1546 p = endp - nbytes;
1547 while (p < endp)
1548 {
1549 if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1550 {
1551 while (n--)
1552 *to++ = *p++;
1553 }
1554 else
1555 {
1556 *to++ = LEADING_CODE_8_BIT_CONTROL;
1557 *to++ = *p++ + 0x20;
1558 }
1559 chars++;
1560 }
1561 if (nchars)
1562 *nchars = chars;
1563 return (to - str);
1564 }
1565
1566 /* Parse unibyte string at STR of LEN bytes, and return the number of
1567 bytes it may ocupy when converted to multibyte string by
1568 `str_to_multibyte'. */
1569
1570 int
1571 parse_str_to_multibyte (str, len)
1572 unsigned char *str;
1573 int len;
1574 {
1575 unsigned char *endp = str + len;
1576 int bytes;
1577
1578 for (bytes = 0; str < endp; str++)
1579 bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1580 return bytes;
1581 }
1582
1583 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1584 that contains the same single-byte characters. It actually
1585 converts all 8-bit characters to multibyte forms. It is assured
1586 that we can use LEN bytes at STR as a work area and that is
1587 enough. */
1588
1589 int
1590 str_to_multibyte (str, len, bytes)
1591 unsigned char *str;
1592 int len, bytes;
1593 {
1594 unsigned char *p = str, *endp = str + bytes;
1595 unsigned char *to;
1596
1597 while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1598 if (p == endp)
1599 return bytes;
1600 to = p;
1601 bytes = endp - p;
1602 endp = str + len;
1603 safe_bcopy (p, endp - bytes, bytes);
1604 p = endp - bytes;
1605 while (p < endp)
1606 {
1607 if (*p < 0x80 || *p >= 0xA0)
1608 *to++ = *p++;
1609 else
1610 *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1611 }
1612 return (to - str);
1613 }
1614
1615 /* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
1616 actually converts only 8-bit characters in the range 0x80..0x9F to
1617 unibyte forms. */
1618
1619 int
1620 str_as_unibyte (str, bytes)
1621 unsigned char *str;
1622 int bytes;
1623 {
1624 unsigned char *p = str, *endp = str + bytes;
1625 unsigned char *to = str;
1626
1627 while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1628 to = p;
1629 while (p < endp)
1630 {
1631 if (*p == LEADING_CODE_8_BIT_CONTROL)
1632 *to++ = *(p + 1) - 0x20, p += 2;
1633 else
1634 *to++ = *p++;
1635 }
1636 return (to - str);
1637 }
1638
1639 \f
1640 DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
1641 doc: /* Concatenate all the argument characters and make the result a string.
1642 usage: (string &rest CHARACTERS) */)
1643 (n, args)
1644 int n;
1645 Lisp_Object *args;
1646 {
1647 int i, bufsize;
1648 unsigned char *buf, *p;
1649 int c;
1650 int multibyte = 0;
1651 Lisp_Object ret;
1652 USE_SAFE_ALLOCA;
1653
1654 bufsize = MAX_MULTIBYTE_LENGTH * n;
1655 SAFE_ALLOCA (buf, unsigned char *, bufsize);
1656 p = buf;
1657
1658 for (i = 0; i < n; i++)
1659 {
1660 CHECK_NUMBER (args[i]);
1661 if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1662 multibyte = 1;
1663 }
1664
1665 for (i = 0; i < n; i++)
1666 {
1667 c = XINT (args[i]);
1668 if (multibyte)
1669 p += CHAR_STRING (c, p);
1670 else
1671 *p++ = c;
1672 }
1673
1674 ret = make_string_from_bytes (buf, n, p - buf);
1675 SAFE_FREE ();
1676
1677 return ret;
1678 }
1679
1680 #endif /* emacs */
1681 \f
1682 int
1683 charset_id_internal (charset_name)
1684 char *charset_name;
1685 {
1686 Lisp_Object val;
1687
1688 val= Fget (intern (charset_name), Qcharset);
1689 if (!VECTORP (val))
1690 error ("Charset %s is not defined", charset_name);
1691
1692 return (XINT (XVECTOR (val)->contents[0]));
1693 }
1694
1695 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1696 Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only. */)
1697 ()
1698 {
1699 charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1700 charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1701 charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1702 charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1703 charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1704 charset_big5_1 = charset_id_internal ("chinese-big5-1");
1705 charset_big5_2 = charset_id_internal ("chinese-big5-2");
1706 charset_mule_unicode_0100_24ff
1707 = charset_id_internal ("mule-unicode-0100-24ff");
1708 charset_mule_unicode_2500_33ff
1709 = charset_id_internal ("mule-unicode-2500-33ff");
1710 charset_mule_unicode_e000_ffff
1711 = charset_id_internal ("mule-unicode-e000-ffff");
1712 return Qnil;
1713 }
1714
1715 void
1716 init_charset_once ()
1717 {
1718 int i, j, k;
1719
1720 staticpro (&Vcharset_table);
1721 staticpro (&Vcharset_symbol_table);
1722 staticpro (&Vgeneric_character_list);
1723
1724 /* This has to be done here, before we call Fmake_char_table. */
1725 Qcharset_table = intern ("charset-table");
1726 staticpro (&Qcharset_table);
1727
1728 /* Intern this now in case it isn't already done.
1729 Setting this variable twice is harmless.
1730 But don't staticpro it here--that is done in alloc.c. */
1731 Qchar_table_extra_slots = intern ("char-table-extra-slots");
1732
1733 /* Now we are ready to set up this property, so we can
1734 create the charset table. */
1735 Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1736 Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1737
1738 Qunknown = intern ("unknown");
1739 staticpro (&Qunknown);
1740 Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1741 Qunknown);
1742
1743 /* Setup tables. */
1744 for (i = 0; i < 2; i++)
1745 for (j = 0; j < 2; j++)
1746 for (k = 0; k < 128; k++)
1747 iso_charset_table [i][j][k] = -1;
1748
1749 for (i = 0; i < 256; i++)
1750 bytes_by_char_head[i] = 1;
1751 bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1752 bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1753 bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1754 bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1755
1756 for (i = 0; i < 128; i++)
1757 width_by_char_head[i] = 1;
1758 for (; i < 256; i++)
1759 width_by_char_head[i] = 4;
1760 width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1761 width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1762 width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1763 width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1764
1765 {
1766 Lisp_Object val;
1767
1768 val = Qnil;
1769 for (i = 0x81; i < 0x90; i++)
1770 val = Fcons (make_number ((i - 0x70) << 7), val);
1771 for (; i < 0x9A; i++)
1772 val = Fcons (make_number ((i - 0x8F) << 14), val);
1773 for (i = 0xA0; i < 0xF0; i++)
1774 val = Fcons (make_number ((i - 0x70) << 7), val);
1775 for (; i < 0xFF; i++)
1776 val = Fcons (make_number ((i - 0xE0) << 14), val);
1777 Vgeneric_character_list = Fnreverse (val);
1778 }
1779
1780 nonascii_insert_offset = 0;
1781 Vnonascii_translation_table = Qnil;
1782 }
1783
1784 #ifdef emacs
1785
1786 void
1787 syms_of_charset ()
1788 {
1789 Qcharset = intern ("charset");
1790 staticpro (&Qcharset);
1791
1792 Qascii = intern ("ascii");
1793 staticpro (&Qascii);
1794
1795 Qeight_bit_control = intern ("eight-bit-control");
1796 staticpro (&Qeight_bit_control);
1797
1798 Qeight_bit_graphic = intern ("eight-bit-graphic");
1799 staticpro (&Qeight_bit_graphic);
1800
1801 /* Define special charsets ascii, eight-bit-control, and
1802 eight-bit-graphic. */
1803 update_charset_table (make_number (CHARSET_ASCII),
1804 make_number (1), make_number (94),
1805 make_number (1),
1806 make_number (0),
1807 make_number ('B'),
1808 make_number (0),
1809 build_string ("ASCII"),
1810 Qnil, /* same as above */
1811 build_string ("ASCII (ISO646 IRV)"));
1812 CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1813 Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1814
1815 update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1816 make_number (1), make_number (96),
1817 make_number (4),
1818 make_number (0),
1819 make_number (-1),
1820 make_number (-1),
1821 build_string ("8-bit control code (0x80..0x9F)"),
1822 Qnil, /* same as above */
1823 Qnil); /* same as above */
1824 CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1825 Fput (Qeight_bit_control, Qcharset,
1826 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1827
1828 update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1829 make_number (1), make_number (96),
1830 make_number (4),
1831 make_number (0),
1832 make_number (-1),
1833 make_number (-1),
1834 build_string ("8-bit graphic char (0xA0..0xFF)"),
1835 Qnil, /* same as above */
1836 Qnil); /* same as above */
1837 CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1838 Fput (Qeight_bit_graphic, Qcharset,
1839 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1840
1841 Qauto_fill_chars = intern ("auto-fill-chars");
1842 staticpro (&Qauto_fill_chars);
1843 Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1844
1845 defsubr (&Sdefine_charset);
1846 defsubr (&Sgeneric_character_list);
1847 defsubr (&Sget_unused_iso_final_char);
1848 defsubr (&Sdeclare_equiv_charset);
1849 defsubr (&Sfind_charset_region);
1850 defsubr (&Sfind_charset_string);
1851 defsubr (&Smake_char_internal);
1852 defsubr (&Ssplit_char);
1853 defsubr (&Schar_charset);
1854 defsubr (&Scharset_after);
1855 defsubr (&Siso_charset);
1856 defsubr (&Schar_valid_p);
1857 defsubr (&Sunibyte_char_to_multibyte);
1858 defsubr (&Smultibyte_char_to_unibyte);
1859 defsubr (&Schar_bytes);
1860 defsubr (&Schar_width);
1861 defsubr (&Sstring_width);
1862 defsubr (&Schar_direction);
1863 defsubr (&Sstring);
1864 defsubr (&Ssetup_special_charsets);
1865
1866 DEFVAR_LISP ("charset-list", &Vcharset_list,
1867 doc: /* List of charsets ever defined. */);
1868 Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1869 Fcons (Qeight_bit_graphic, Qnil)));
1870
1871 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
1872 doc: /* Vector of cons cell of a symbol and translation table ever defined.
1873 An ID of a translation table is an index of this vector. */);
1874 Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1875
1876 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1877 doc: /* Leading-code of private TYPE9N charset of column-width 1. */);
1878 leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1879
1880 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1881 doc: /* Leading-code of private TYPE9N charset of column-width 2. */);
1882 leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1883
1884 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1885 doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1. */);
1886 leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1887
1888 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1889 doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2. */);
1890 leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1891
1892 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1893 doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1894 This is used for converting unibyte text to multibyte,
1895 and for inserting character codes specified by number.
1896
1897 This serves to convert a Latin-1 or similar 8-bit character code
1898 to the corresponding Emacs multibyte character code.
1899 Typically the value should be (- (make-char CHARSET 0) 128),
1900 for your choice of character set.
1901 If `nonascii-translation-table' is non-nil, it overrides this variable. */);
1902 nonascii_insert_offset = 0;
1903
1904 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1905 doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1906 This is used for converting unibyte text to multibyte,
1907 and for inserting character codes specified by number.
1908
1909 Conversion is performed only when multibyte characters are enabled,
1910 and it serves to convert a Latin-1 or similar 8-bit character code
1911 to the corresponding Emacs character code.
1912
1913 If this is nil, `nonascii-insert-offset' is used instead.
1914 See also the docstring of `make-translation-table'. */);
1915 Vnonascii_translation_table = Qnil;
1916
1917 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1918 doc: /* A char-table for characters which invoke auto-filling.
1919 Such characters have value t in this table. */);
1920 Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1921 CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1922 CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1923 }
1924
1925 #endif /* emacs */
1926
1927 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
1928 (do not change this comment) */