1 /* Basic multilingual character support.
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* At first, see the document in `charset.h' to understand the code in
29 #include <sys/types.h>
43 Lisp_Object Qcharset
, Qascii
, Qcomposition
;
46 /* Declaration of special leading-codes. */
47 int leading_code_composition
; /* for composite characters */
48 int leading_code_private_11
; /* for private DIMENSION1 of 1-column */
49 int leading_code_private_12
; /* for private DIMENSION1 of 2-column */
50 int leading_code_private_21
; /* for private DIMENSION2 of 1-column */
51 int leading_code_private_22
; /* for private DIMENSION2 of 2-column */
53 /* Declaration of special charsets. */
54 int charset_ascii
; /* ASCII */
55 int charset_composition
; /* for a composite character */
56 int charset_latin_iso8859_1
; /* ISO8859-1 (Latin-1) */
57 int charset_jisx0208_1978
; /* JISX0208.1978 (Japanese Kanji old set) */
58 int charset_jisx0208
; /* JISX0208.1983 (Japanese Kanji) */
59 int charset_katakana_jisx0201
; /* JISX0201.Kana (Japanese Katakana) */
60 int charset_latin_jisx0201
; /* JISX0201.Roman (Japanese Roman) */
61 int charset_big5_1
; /* Big5 Level 1 (Chinese Traditional) */
62 int charset_big5_2
; /* Big5 Level 2 (Chinese Traditional) */
64 int min_composite_char
;
66 Lisp_Object Qcharset_table
;
68 /* A char-table containing information of each character set. */
69 Lisp_Object Vcharset_table
;
71 /* A vector of charset symbol indexed by charset-id. This is used
72 only for returning charset symbol from C functions. */
73 Lisp_Object Vcharset_symbol_table
;
75 /* A list of charset symbols ever defined. */
76 Lisp_Object Vcharset_list
;
78 /* Vector of translation table ever defined.
79 ID of a translation table is used to index this vector. */
80 Lisp_Object Vtranslation_table_vector
;
82 /* A char-table for characters which may invoke auto-filling. */
83 Lisp_Object Vauto_fill_chars
;
85 Lisp_Object Qauto_fill_chars
;
87 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
88 int bytes_by_char_head
[256];
89 int width_by_char_head
[256];
91 /* Mapping table from ISO2022's charset (specified by DIMENSION,
92 CHARS, and FINAL-CHAR) to Emacs' charset. */
93 int iso_charset_table
[2][2][128];
95 /* Table of pointers to the structure `cmpchar_info' indexed by
97 struct cmpchar_info
**cmpchar_table
;
98 /* The current size of `cmpchar_table'. */
99 static int cmpchar_table_size
;
100 /* Number of the current composite characters. */
103 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
104 unsigned char *_fetch_multibyte_char_p
;
105 int _fetch_multibyte_char_len
;
107 /* Offset to add to a non-ASCII value when inserting it. */
108 int nonascii_insert_offset
;
110 /* Translation table for converting non-ASCII unibyte characters
111 to multibyte codes, or nil. */
112 Lisp_Object Vnonascii_translation_table
;
114 /* List of all possible generic characters. */
115 Lisp_Object Vgeneric_character_list
;
117 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
118 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
121 invalid_character (c
)
124 error ("Invalid character: 0%o, %d, 0x%x", c
, c
, c
);
127 /* Parse composite character string STR of length LENGTH (>= 2) and
128 set BYTES, CHARSET, C1, and C2 as below.
130 It is assumed that *STR is LEADING_CODE_COMPOSITION and the
131 following (LENGTH - 1) bytes satisfy !CHAR_HEAD_P.
133 If there is a valid composite character, set CHARSET, C1, and C2 to
134 such values that MAKE_CHAR can make the composite character from
135 them. Otherwise, set CHARSET to CHARSET_COMPOSITION, set C1 to the
136 second byte of the sequence, C2 to -1 so that MAKE_CHAR can make
137 the invalid multibyte character whose string representation is two
138 bytes of STR[0] and STR[1]. In any case, set BYTES to LENGTH.
140 This macro should be called only from SPLIT_MULTIBYTE_SEQ. */
142 #define SPLIT_COMPOSITE_SEQ(str, length, bytes, charset, c1, c2) \
144 int cmpchar_id = str_cmpchar_id ((str), (length)); \
146 (charset) = CHARSET_COMPOSITION; \
147 (bytes) = (length); \
148 if (cmpchar_id >= 0) \
150 (c1) = CHAR_FIELD2 (cmpchar_id); \
151 (c2) = CHAR_FIELD3 (cmpchar_id); \
155 (c1) = (str)[1] & 0x7F; \
160 /* Parse non-composite multibyte character string STR of length LENGTH
161 (>= 2) and set BYTES to the length of actual multibyte sequence,
162 CHARSET, C1, and C2 to such values that MAKE_CHAR can make the
163 multibyte character from them.
165 It is assumed that *STR is one of base leading codes (excluding
166 LEADING_CODE_COMPOSITION) and the following (LENGTH - 1) bytes
167 satisfy !CHAR_HEAD_P.
169 This macro should be called only from SPLIT_MULTIBYTE_SEQ. */
171 #define SPLIT_CHARACTER_SEQ(str, length, bytes, charset, c1, c2) \
174 (charset) = (str)[0]; \
175 if ((charset) >= LEADING_CODE_PRIVATE_11 \
176 && (charset) <= LEADING_CODE_PRIVATE_22) \
177 (charset) = (str)[(bytes)++]; \
178 if ((bytes) < (length)) \
180 (c1) = (str)[(bytes)++] & 0x7F; \
181 if ((bytes) < (length)) \
182 (c2) = (str)[(bytes)++] & 0x7F; \
190 /* Parse string STR of length LENGTH and check if a multibyte
191 characters is at STR. set BYTES to the actual length, CHARSET, C1,
192 C2 to proper values for that character. */
194 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
197 if (ASCII_BYTE_P ((str)[0])) \
200 for (i = 1; i < (length) && ! CHAR_HEAD_P ((str)[i]); i++); \
202 (bytes) = 1, (charset) = CHARSET_ASCII, (c1) = (str)[0] ; \
203 else if ((str)[0] == LEADING_CODE_COMPOSITION) \
204 SPLIT_COMPOSITE_SEQ (str, i, bytes, charset, c1, c2); \
207 if (i > BYTES_BY_CHAR_HEAD ((str)[0])) \
208 i = BYTES_BY_CHAR_HEAD ((str)[0]); \
209 SPLIT_CHARACTER_SEQ (str, i, bytes, charset, c1, c2); \
213 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0. */
214 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
215 (CHARSET_DIMENSION (charset) == 1 \
216 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
217 : ((c1) >= 0x20 && (c1) <= 0x7F && (c2) >= 0x20 && (c2) <= 0x7F))
219 /* Set STR a pointer to the multi-byte form of the character C. If C
220 is not a composite character, the multi-byte form is set in WORKBUF
221 and STR points WORKBUF. The caller should allocate at least 4-byte
222 area at WORKBUF in advance. Returns the length of the multi-byte
223 form. If C is an invalid character, store (C & 0xFF) in WORKBUF[0]
226 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
227 function directly if C can be an ASCII character. */
230 non_ascii_char_to_string (c
, workbuf
, str
)
232 unsigned char *workbuf
, **str
;
234 if (c
& CHAR_MODIFIER_MASK
) /* This includes the case C is negative. */
236 /* Multibyte character can't have a modifier bit. */
237 if (! SINGLE_BYTE_CHAR_P ((c
& ~CHAR_MODIFIER_MASK
)))
238 invalid_character (c
);
240 /* For Meta, Shift, and Control modifiers, we need special care. */
243 /* Move the meta bit to the right place for a string. */
244 c
= (c
& ~CHAR_META
) | 0x80;
248 /* Shift modifier is valid only with [A-Za-z]. */
249 if ((c
& 0377) >= 'A' && (c
& 0377) <= 'Z')
251 else if ((c
& 0377) >= 'a' && (c
& 0377) <= 'z')
252 c
= (c
& ~CHAR_SHIFT
) - ('a' - 'A');
256 /* Simulate the code in lread.c. */
257 /* Allow `\C- ' and `\C-?'. */
258 if (c
== (CHAR_CTL
| ' '))
260 else if (c
== (CHAR_CTL
| '?'))
262 /* ASCII control chars are made from letters (both cases),
263 as well as the non-letters within 0100...0137. */
264 else if ((c
& 0137) >= 0101 && (c
& 0137) <= 0132)
265 c
&= (037 | (~0177 & ~CHAR_CTL
));
266 else if ((c
& 0177) >= 0100 && (c
& 0177) <= 0137)
267 c
&= (037 | (~0177 & ~CHAR_CTL
));
270 /* If C still has any modifier bits, it is an invalid character. */
271 if (c
& CHAR_MODIFIER_MASK
)
272 invalid_character (c
);
281 SPLIT_NON_ASCII_CHAR (c
, charset
, c1
, c2
);
282 if (charset
== CHARSET_COMPOSITION
)
285 invalid_character (c
);
286 if (c
>= MIN_CHAR_COMPOSITION
)
288 /* Valid composite character. */
289 *str
= cmpchar_table
[COMPOSITE_CHAR_ID (c
)]->data
;
290 workbuf
= *str
+ cmpchar_table
[COMPOSITE_CHAR_ID (c
)]->len
;
294 /* Invalid but can have multibyte form. */
296 *workbuf
++ = LEADING_CODE_COMPOSITION
;
297 *workbuf
++ = c1
| 0x80;
300 else if (charset
> CHARSET_COMPOSITION
)
303 if (charset
>= LEADING_CODE_EXT_11
)
304 *workbuf
++ = (charset
< LEADING_CODE_EXT_12
305 ? LEADING_CODE_PRIVATE_11
306 : (charset
< LEADING_CODE_EXT_21
307 ? LEADING_CODE_PRIVATE_12
308 : (charset
< LEADING_CODE_EXT_22
309 ? LEADING_CODE_PRIVATE_21
310 : LEADING_CODE_PRIVATE_22
)));
311 *workbuf
++ = charset
;
312 if (c1
> 0 && c1
< 32 || c2
> 0 && c2
< 32)
313 invalid_character (c
);
316 *workbuf
++ = c1
| 0x80;
318 *workbuf
++ = c2
| 0x80;
321 else if (charset
== CHARSET_ASCII
)
322 *workbuf
++= c
& 0x7F;
324 invalid_character (c
);
327 return (workbuf
- *str
);
330 /* Return the non-ASCII character corresponding to multi-byte form at
331 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
332 length of the multibyte form in *ACTUAL_LEN.
334 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
335 directly if you want ot handle ASCII characters as well. */
338 string_to_non_ascii_char (str
, len
, actual_len
)
339 const unsigned char *str
;
340 int len
, *actual_len
;
342 int c
, bytes
, charset
, c1
, c2
;
344 SPLIT_MULTIBYTE_SEQ (str
, len
, bytes
, charset
, c1
, c2
);
345 c
= MAKE_CHAR (charset
, c1
, c2
);
351 /* Return the length of the multi-byte form at string STR of length LEN.
352 Use the macro MULTIBYTE_FORM_LENGTH instead. */
354 multibyte_form_length (str
, len
)
355 const unsigned char *str
;
360 PARSE_MULTIBYTE_SEQ (str
, len
, bytes
);
364 /* Check multibyte form at string STR of length LEN and set variables
365 pointed by CHARSET, C1, and C2 to charset and position codes of the
366 character at STR, and return 0. If there's no multibyte character,
367 return -1. This should be used only in the macro SPLIT_STRING
368 which checks range of STR in advance. */
371 split_non_ascii_string (str
, len
, charset
, c1
, c2
)
372 const unsigned char *str
;
373 unsigned char *c1
, *c2
;
376 register int bytes
, cs
, code1
, code2
= -1;
378 SPLIT_MULTIBYTE_SEQ (str
, len
, bytes
, cs
, code1
, code2
);
379 if (cs
== CHARSET_ASCII
)
386 /* Return 1 iff character C has valid printable glyph.
387 Use the macro CHAR_PRINTABLE_P instead. */
392 int charset
, c1
, c2
, chars
;
394 if (SINGLE_BYTE_CHAR_P (c
))
396 if (c
>= MIN_CHAR_COMPOSITION
)
397 return (c
< MAX_CHAR
);
399 SPLIT_NON_ASCII_CHAR (c
, charset
, c1
, c2
);
400 if (! CHARSET_DEFINED_P (charset
))
402 if (CHARSET_CHARS (charset
) == 94
403 ? c1
<= 32 || c1
>= 127
406 if (CHARSET_DIMENSION (charset
) == 2
407 && (CHARSET_CHARS (charset
) == 94
408 ? c2
<= 32 || c2
>= 127
414 /* Translate character C by translation table TABLE. If C
415 is negative, translate a character specified by CHARSET, C1, and C2
416 (C1 and C2 are code points of the character). If no translation is
417 found in TABLE, return C. */
419 translate_char (table
, c
, charset
, c1
, c2
)
421 int c
, charset
, c1
, c2
;
424 int alt_charset
, alt_c1
, alt_c2
, dimension
;
426 if (c
< 0) c
= MAKE_CHAR (charset
, (c1
& 0x7F) , (c2
& 0x7F));
427 if (!CHAR_TABLE_P (table
)
428 || (ch
= Faref (table
, make_number (c
)), !NATNUMP (ch
)))
431 SPLIT_CHAR (XFASTINT (ch
), alt_charset
, alt_c1
, alt_c2
);
432 dimension
= CHARSET_DIMENSION (alt_charset
);
433 if (dimension
== 1 && alt_c1
> 0 || dimension
== 2 && alt_c2
> 0)
434 /* CH is not a generic character, just return it. */
435 return XFASTINT (ch
);
437 /* Since CH is a generic character, we must return a specific
438 charater which has the same position codes as C from CH. */
440 SPLIT_CHAR (c
, charset
, c1
, c2
);
441 if (dimension
!= CHARSET_DIMENSION (charset
))
442 /* We can't make such a character because of dimension mismatch. */
444 return MAKE_CHAR (alt_charset
, c1
, c2
);
447 /* Convert the unibyte character C to multibyte based on
448 Vnonascii_translation_table or nonascii_insert_offset. If they can't
449 convert C to a valid multibyte character, convert it based on
450 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
453 unibyte_char_to_multibyte (c
)
456 if (c
< 0400 && c
>= 0200)
460 if (! NILP (Vnonascii_translation_table
))
462 c
= XINT (Faref (Vnonascii_translation_table
, make_number (c
)));
463 if (c
>= 0400 && ! char_valid_p (c
, 0))
464 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
466 else if (c
>= 0240 && nonascii_insert_offset
> 0)
468 c
+= nonascii_insert_offset
;
469 if (c
< 0400 || ! char_valid_p (c
, 0))
470 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
473 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
479 /* Convert the multibyte character C to unibyte 8-bit character based
480 on Vnonascii_translation_table or nonascii_insert_offset. If
481 REV_TBL is non-nil, it should be a reverse table of
482 Vnonascii_translation_table, i.e. what given by:
483 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
486 multibyte_char_to_unibyte (c
, rev_tbl
)
490 if (!SINGLE_BYTE_CHAR_P (c
))
494 if (! CHAR_TABLE_P (rev_tbl
)
495 && CHAR_TABLE_P (Vnonascii_translation_table
))
496 rev_tbl
= Fchar_table_extra_slot (Vnonascii_translation_table
,
498 if (CHAR_TABLE_P (rev_tbl
))
501 temp
= Faref (rev_tbl
, make_number (c
));
505 c
= (c_save
& 0177) + 0200;
509 if (nonascii_insert_offset
> 0)
510 c
-= nonascii_insert_offset
;
511 if (c
< 128 || c
>= 256)
512 c
= (c_save
& 0177) + 0200;
520 /* Update the table Vcharset_table with the given arguments (see the
521 document of `define-charset' for the meaning of each argument).
522 Several other table contents are also updated. The caller should
523 check the validity of CHARSET-ID and the remaining arguments in
527 update_charset_table (charset_id
, dimension
, chars
, width
, direction
,
528 iso_final_char
, iso_graphic_plane
,
529 short_name
, long_name
, description
)
530 Lisp_Object charset_id
, dimension
, chars
, width
, direction
;
531 Lisp_Object iso_final_char
, iso_graphic_plane
;
532 Lisp_Object short_name
, long_name
, description
;
534 int charset
= XINT (charset_id
);
536 unsigned char leading_code_base
, leading_code_ext
;
538 if (NILP (CHARSET_TABLE_ENTRY (charset
)))
539 CHARSET_TABLE_ENTRY (charset
)
540 = Fmake_vector (make_number (CHARSET_MAX_IDX
), Qnil
);
542 /* Get byte length of multibyte form, base leading-code, and
543 extended leading-code of the charset. See the comment under the
544 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
545 bytes
= XINT (dimension
);
546 if (charset
< MIN_CHARSET_PRIVATE_DIMENSION1
)
548 /* Official charset, it doesn't have an extended leading-code. */
549 if (charset
!= CHARSET_ASCII
)
550 bytes
+= 1; /* For a base leading-code. */
551 leading_code_base
= charset
;
552 leading_code_ext
= 0;
556 /* Private charset. */
557 bytes
+= 2; /* For base and extended leading-codes. */
559 = (charset
< LEADING_CODE_EXT_12
560 ? LEADING_CODE_PRIVATE_11
561 : (charset
< LEADING_CODE_EXT_21
562 ? LEADING_CODE_PRIVATE_12
563 : (charset
< LEADING_CODE_EXT_22
564 ? LEADING_CODE_PRIVATE_21
565 : LEADING_CODE_PRIVATE_22
)));
566 leading_code_ext
= charset
;
569 if (BYTES_BY_CHAR_HEAD (leading_code_base
) != bytes
)
570 error ("Invalid dimension for the charset-ID %d", charset
);
572 CHARSET_TABLE_INFO (charset
, CHARSET_ID_IDX
) = charset_id
;
573 CHARSET_TABLE_INFO (charset
, CHARSET_BYTES_IDX
) = make_number (bytes
);
574 CHARSET_TABLE_INFO (charset
, CHARSET_DIMENSION_IDX
) = dimension
;
575 CHARSET_TABLE_INFO (charset
, CHARSET_CHARS_IDX
) = chars
;
576 CHARSET_TABLE_INFO (charset
, CHARSET_WIDTH_IDX
) = width
;
577 CHARSET_TABLE_INFO (charset
, CHARSET_DIRECTION_IDX
) = direction
;
578 CHARSET_TABLE_INFO (charset
, CHARSET_LEADING_CODE_BASE_IDX
)
579 = make_number (leading_code_base
);
580 CHARSET_TABLE_INFO (charset
, CHARSET_LEADING_CODE_EXT_IDX
)
581 = make_number (leading_code_ext
);
582 CHARSET_TABLE_INFO (charset
, CHARSET_ISO_FINAL_CHAR_IDX
) = iso_final_char
;
583 CHARSET_TABLE_INFO (charset
, CHARSET_ISO_GRAPHIC_PLANE_IDX
)
585 CHARSET_TABLE_INFO (charset
, CHARSET_SHORT_NAME_IDX
) = short_name
;
586 CHARSET_TABLE_INFO (charset
, CHARSET_LONG_NAME_IDX
) = long_name
;
587 CHARSET_TABLE_INFO (charset
, CHARSET_DESCRIPTION_IDX
) = description
;
588 CHARSET_TABLE_INFO (charset
, CHARSET_PLIST_IDX
) = Qnil
;
591 /* If we have already defined a charset which has the same
592 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
593 DIRECTION, we must update the entry REVERSE-CHARSET of both
594 charsets. If there's no such charset, the value of the entry
598 for (i
= 0; i
<= MAX_CHARSET
; i
++)
599 if (!NILP (CHARSET_TABLE_ENTRY (i
)))
601 if (CHARSET_DIMENSION (i
) == XINT (dimension
)
602 && CHARSET_CHARS (i
) == XINT (chars
)
603 && CHARSET_ISO_FINAL_CHAR (i
) == XINT (iso_final_char
)
604 && CHARSET_DIRECTION (i
) != XINT (direction
))
606 CHARSET_TABLE_INFO (charset
, CHARSET_REVERSE_CHARSET_IDX
)
608 CHARSET_TABLE_INFO (i
, CHARSET_REVERSE_CHARSET_IDX
) = charset_id
;
613 /* No such a charset. */
614 CHARSET_TABLE_INFO (charset
, CHARSET_REVERSE_CHARSET_IDX
)
618 if (charset
!= CHARSET_ASCII
619 && charset
< MIN_CHARSET_PRIVATE_DIMENSION1
)
621 width_by_char_head
[leading_code_base
] = XINT (width
);
623 /* Update table emacs_code_class. */
624 emacs_code_class
[charset
] = (bytes
== 2
625 ? EMACS_leading_code_2
627 ? EMACS_leading_code_3
628 : EMACS_leading_code_4
));
631 /* Update table iso_charset_table. */
632 if (ISO_CHARSET_TABLE (dimension
, chars
, iso_final_char
) < 0)
633 ISO_CHARSET_TABLE (dimension
, chars
, iso_final_char
) = charset
;
638 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
641 get_charset_id (charset_symbol
)
642 Lisp_Object charset_symbol
;
647 return ((SYMBOLP (charset_symbol
)
648 && (val
= Fget (charset_symbol
, Qcharset
), VECTORP (val
))
649 && (charset
= XINT (XVECTOR (val
)->contents
[CHARSET_ID_IDX
]),
650 CHARSET_VALID_P (charset
)))
654 /* Return an identification number for a new private charset of
655 DIMENSION and WIDTH. If there's no more room for the new charset,
658 get_new_private_charset_id (dimension
, width
)
659 int dimension
, width
;
661 int charset
, from
, to
;
666 from
= LEADING_CODE_EXT_11
, to
= LEADING_CODE_EXT_12
;
668 from
= LEADING_CODE_EXT_12
, to
= LEADING_CODE_EXT_21
;
673 from
= LEADING_CODE_EXT_21
, to
= LEADING_CODE_EXT_22
;
675 from
= LEADING_CODE_EXT_22
, to
= LEADING_CODE_EXT_MAX
+ 1;
678 for (charset
= from
; charset
< to
; charset
++)
679 if (!CHARSET_DEFINED_P (charset
)) break;
681 return make_number (charset
< to
? charset
: 0);
684 DEFUN ("define-charset", Fdefine_charset
, Sdefine_charset
, 3, 3, 0,
685 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
686 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
687 treated as a private charset.\n\
688 INFO-VECTOR is a vector of the format:\n\
689 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
690 SHORT-NAME LONG-NAME DESCRIPTION]\n\
691 The meanings of each elements is as follows:\n\
692 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
693 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
694 WIDTH (integer) is the number of columns a character in the charset\n\
695 occupies on the screen: one of 0, 1, and 2.\n\
697 DIRECTION (integer) is the rendering direction of characters in the\n\
698 charset when rendering. If 0, render from left to right, else\n\
699 render from right to left.\n\
701 ISO-FINAL-CHAR (character) is the final character of the\n\
702 corresponding ISO 2022 charset.\n\
704 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
705 while encoding to variants of ISO 2022 coding system, one of the\n\
706 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
708 SHORT-NAME (string) is the short name to refer to the charset.\n\
710 LONG-NAME (string) is the long name to refer to the charset.\n\
712 DESCRIPTION (string) is the description string of the charset.")
713 (charset_id
, charset_symbol
, info_vector
)
714 Lisp_Object charset_id
, charset_symbol
, info_vector
;
718 if (!NILP (charset_id
))
719 CHECK_NUMBER (charset_id
, 0);
720 CHECK_SYMBOL (charset_symbol
, 1);
721 CHECK_VECTOR (info_vector
, 2);
723 if (! NILP (charset_id
))
725 if (! CHARSET_VALID_P (XINT (charset_id
)))
726 error ("Invalid CHARSET: %d", XINT (charset_id
));
727 else if (CHARSET_DEFINED_P (XINT (charset_id
)))
728 error ("Already defined charset: %d", XINT (charset_id
));
731 vec
= XVECTOR (info_vector
)->contents
;
732 if (XVECTOR (info_vector
)->size
!= 9
733 || !INTEGERP (vec
[0]) || !(XINT (vec
[0]) == 1 || XINT (vec
[0]) == 2)
734 || !INTEGERP (vec
[1]) || !(XINT (vec
[1]) == 94 || XINT (vec
[1]) == 96)
735 || !INTEGERP (vec
[2]) || !(XINT (vec
[2]) == 1 || XINT (vec
[2]) == 2)
736 || !INTEGERP (vec
[3]) || !(XINT (vec
[3]) == 0 || XINT (vec
[3]) == 1)
737 || !INTEGERP (vec
[4]) || !(XINT (vec
[4]) >= '0' && XINT (vec
[4]) <= '~')
738 || !INTEGERP (vec
[5]) || !(XINT (vec
[5]) == 0 || XINT (vec
[5]) == 1)
741 || !STRINGP (vec
[8]))
742 error ("Invalid info-vector argument for defining charset %s",
743 XSYMBOL (charset_symbol
)->name
->data
);
745 if (NILP (charset_id
))
747 charset_id
= get_new_private_charset_id (XINT (vec
[0]), XINT (vec
[2]));
748 if (XINT (charset_id
) == 0)
749 error ("There's no room for a new private charset %s",
750 XSYMBOL (charset_symbol
)->name
->data
);
753 update_charset_table (charset_id
, vec
[0], vec
[1], vec
[2], vec
[3],
754 vec
[4], vec
[5], vec
[6], vec
[7], vec
[8]);
755 Fput (charset_symbol
, Qcharset
, CHARSET_TABLE_ENTRY (XINT (charset_id
)));
756 CHARSET_SYMBOL (XINT (charset_id
)) = charset_symbol
;
757 Vcharset_list
= Fcons (charset_symbol
, Vcharset_list
);
761 DEFUN ("generic-character-list", Fgeneric_character_list
,
762 Sgeneric_character_list
, 0, 0, 0,
763 "Return a list of all possible generic characters.\n\
764 It includes a generic character for a charset not yet defined.")
767 return Vgeneric_character_list
;
770 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
771 Sget_unused_iso_final_char
, 2, 2, 0,
772 "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
773 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
774 CHARS is the number of characters in a dimension: 94 or 96.\n\
776 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
777 If there's no unused final char for the specified kind of charset,\n\
780 Lisp_Object dimension
, chars
;
784 CHECK_NUMBER (dimension
, 0);
785 CHECK_NUMBER (chars
, 1);
786 if (XINT (dimension
) != 1 && XINT (dimension
) != 2)
787 error ("Invalid charset dimension %d, it should be 1 or 2",
789 if (XINT (chars
) != 94 && XINT (chars
) != 96)
790 error ("Invalid charset chars %d, it should be 94 or 96",
792 for (final_char
= '0'; final_char
<= '?'; final_char
++)
794 if (ISO_CHARSET_TABLE (dimension
, chars
, make_number (final_char
)) < 0)
797 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
800 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
802 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
803 CHARSET should be defined by `defined-charset' in advance.")
804 (dimension
, chars
, final_char
, charset_symbol
)
805 Lisp_Object dimension
, chars
, final_char
, charset_symbol
;
809 CHECK_NUMBER (dimension
, 0);
810 CHECK_NUMBER (chars
, 1);
811 CHECK_NUMBER (final_char
, 2);
812 CHECK_SYMBOL (charset_symbol
, 3);
814 if (XINT (dimension
) != 1 && XINT (dimension
) != 2)
815 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension
));
816 if (XINT (chars
) != 94 && XINT (chars
) != 96)
817 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
818 if (XINT (final_char
) < '0' || XFASTINT (final_char
) > '~')
819 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
820 if ((charset
= get_charset_id (charset_symbol
)) < 0)
821 error ("Invalid charset %s", XSYMBOL (charset_symbol
)->name
->data
);
823 ISO_CHARSET_TABLE (dimension
, chars
, final_char
) = charset
;
827 /* Return number of different charsets in STR of length LEN. In
828 addition, for each found charset N, CHARSETS[N] is set 1. The
829 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
830 It may lookup a translation table TABLE if supplied.
832 If CMPCHARP is nonzero and some composite character is found,
833 CHARSETS[128] is also set 1 and the returned number is incremented
836 If MULTIBYTE is zero, do not check multibyte characters, i.e. if
837 any ASCII codes (7-bit) are found, CHARSET[0] is set to 1, if any
838 8-bit codes are found CHARSET[1] is set to 1. */
841 find_charset_in_str (str
, len
, charsets
, table
, cmpcharp
, multibyte
)
848 register int num
= 0, c
;
852 unsigned char *endp
= str
+ len
;
855 while (str
< endp
&& maskbits
!= 3)
856 maskbits
|= (*str
++ < 0x80 ? 1 : 2);
870 if (! CHAR_TABLE_P (table
))
878 if (c
== LEADING_CODE_COMPOSITION
)
880 int cmpchar_id
= str_cmpchar_id (str
, len
);
885 struct cmpchar_info
*cmp_p
= cmpchar_table
[cmpchar_id
];
888 for (i
= 0; i
< cmp_p
->glyph_len
; i
++)
893 if ((c
= translate_char (table
, c
, 0, 0, 0)) < 0)
896 if ((charset
= CHAR_CHARSET (c
)) < 0)
897 charset
= CHARSET_ASCII
;
898 if (!charsets
[charset
])
900 charsets
[charset
] = 1;
906 if (cmpcharp
&& !charsets
[CHARSET_COMPOSITION
])
908 charsets
[CHARSET_COMPOSITION
] = 1;
914 charset
= 1; /* This leads to `unknown' charset. */
919 c
= STRING_CHAR_AND_LENGTH (str
, len
, bytes
);
922 int c1
= translate_char (table
, c
, 0, 0, 0);
926 charset
= CHAR_CHARSET (c
);
929 if (!charsets
[charset
])
931 charsets
[charset
] = 1;
940 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
942 "Return a list of charsets in the region between BEG and END.\n\
943 BEG and END are buffer positions.\n\
944 If the region contains any composite character,\n\
945 `composition' is included in the returned list.\n\
946 Optional arg TABLE if non-nil is a translation table to look up.\n\
948 If the region contains invalid multiybte characters,\n\
949 `unknown' is included in the returned list.\n\
951 If the current buffer is unibyte, the returned list contains\n\
952 `ascii' if any 7-bit characters are found,\n\
953 and `unknown' if any 8-bit characters are found.")
955 Lisp_Object beg
, end
, table
;
957 int charsets
[MAX_CHARSET
+ 1];
958 int from
, from_byte
, to
, stop
, stop_byte
, i
;
961 int multibyte
= !NILP (current_buffer
->enable_multibyte_characters
);
963 validate_region (&beg
, &end
);
964 from
= XFASTINT (beg
);
965 stop
= to
= XFASTINT (end
);
967 if (from
< GPT
&& GPT
< to
)
970 stop_byte
= GPT_BYTE
;
973 stop_byte
= CHAR_TO_BYTE (stop
);
975 from_byte
= CHAR_TO_BYTE (from
);
977 bzero (charsets
, (MAX_CHARSET
+ 1) * sizeof (int));
980 find_charset_in_str (BYTE_POS_ADDR (from_byte
), stop_byte
- from_byte
,
981 charsets
, table
, 1, multibyte
);
984 from
= stop
, from_byte
= stop_byte
;
985 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
993 for (i
= (multibyte
? MAX_CHARSET
: 1); i
>= 0; i
--)
996 if (CHARSET_DEFINED_P (i
) || i
== CHARSET_COMPOSITION
)
997 val
= Fcons (CHARSET_SYMBOL (i
), val
);
1002 val
= Fcons (Qunknown
, val
);
1006 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
1008 "Return a list of charsets in STR.\n\
1009 If the string contains any composite characters,\n\
1010 `composition' is included in the returned list.\n\
1011 Optional arg TABLE if non-nil is a translation table to look up.\n\
1013 If the region contains invalid multiybte characters,\n\
1014 `unknown' is included in the returned list.\n\
1016 If STR is unibyte, the returned list contains\n\
1017 `ascii' if any 7-bit characters are found,\n\
1018 and `unknown' if any 8-bit characters are found.")
1020 Lisp_Object str
, table
;
1022 int charsets
[MAX_CHARSET
+ 1];
1028 CHECK_STRING (str
, 0);
1029 multibyte
= STRING_MULTIBYTE (str
);
1031 bzero (charsets
, (MAX_CHARSET
+ 1) * sizeof (int));
1032 find_charset_in_str (XSTRING (str
)->data
, STRING_BYTES (XSTRING (str
)),
1033 charsets
, table
, 1, multibyte
);
1036 for (i
= (multibyte
? MAX_CHARSET
: 1); i
>= 0; i
--)
1039 if (CHARSET_DEFINED_P (i
) || i
== CHARSET_COMPOSITION
)
1040 val
= Fcons (CHARSET_SYMBOL (i
), val
);
1045 val
= Fcons (Qunknown
, val
);
1049 DEFUN ("make-char-internal", Fmake_char_internal
, Smake_char_internal
, 1, 3, 0,
1051 (charset
, code1
, code2
)
1052 Lisp_Object charset
, code1
, code2
;
1054 int charset_id
, c1
, c2
;
1056 CHECK_NUMBER (charset
, 0);
1057 charset_id
= XINT (charset
);
1058 if (!CHARSET_DEFINED_P (charset_id
))
1059 error ("Invalid charset ID: %d", XINT (charset
));
1065 CHECK_NUMBER (code1
, 1);
1072 CHECK_NUMBER (code2
, 2);
1076 if (c1
< 0 || c1
> 0xFF || c2
< 0 || c2
> 0xFF)
1077 error ("Invalid code points: %d %d", c1
, c2
);
1083 ? !CHAR_COMPONENTS_VALID_P (charset
, c1
, 0x20)
1084 : !CHAR_COMPONENTS_VALID_P (charset
, c1
, c2
)))
1085 error ("Invalid code points: %d %d", c1
, c2
);
1087 return make_number (MAKE_CHAR (charset_id
, c1
, c2
));
1090 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
1091 "Return list of charset and one or two position-codes of CHAR.\n\
1092 If CHAR is invalid as a character code,\n\
1093 return a list of symbol `unknown' and CHAR.")
1098 int c
, charset
, c1
, c2
;
1100 CHECK_NUMBER (ch
, 0);
1102 if (!CHAR_VALID_P (c
, 1))
1103 return Fcons (Qunknown
, Fcons (ch
, Qnil
));
1104 SPLIT_CHAR (XFASTINT (ch
), charset
, c1
, c2
);
1106 ? Fcons (CHARSET_SYMBOL (charset
),
1107 Fcons (make_number (c1
), Fcons (make_number (c2
), Qnil
)))
1108 : Fcons (CHARSET_SYMBOL (charset
), Fcons (make_number (c1
), Qnil
)));
1111 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 1, 0,
1112 "Return charset of CHAR.")
1116 CHECK_NUMBER (ch
, 0);
1118 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch
)));
1121 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
1122 "Return charset of a character in the current buffer at position POS.\n\
1123 If POS is nil, it defauls to the current point.\n\
1124 If POS is out of range, the value is nil.")
1128 register int pos_byte
, bytes
, charset
, c1
, c2
;
1129 register unsigned char *p
;
1133 else if (MARKERP (pos
))
1135 pos_byte
= marker_byte_position (pos
);
1136 if (pos_byte
< BEGV_BYTE
|| pos_byte
>= ZV_BYTE
)
1141 CHECK_NUMBER (pos
, 0);
1142 if (XINT (pos
) < BEGV
|| XINT (pos
) >= ZV
)
1144 pos_byte
= CHAR_TO_BYTE (XINT (pos
));
1146 p
= BYTE_POS_ADDR (pos_byte
);
1147 if (BASE_LEADING_CODE_P (*p
))
1149 SPLIT_MULTIBYTE_SEQ (p
, Z_BYTE
- pos_byte
, bytes
, charset
, c1
, c2
);
1154 charset
= CHARSET_ASCII
;
1156 return CHARSET_SYMBOL (charset
);
1159 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
1160 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
1162 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
1163 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
1164 where as Emacs distinguishes them by charset symbol.\n\
1165 See the documentation of the function `charset-info' for the meanings of\n\
1166 DIMENSION, CHARS, and FINAL-CHAR.")
1167 (dimension
, chars
, final_char
)
1168 Lisp_Object dimension
, chars
, final_char
;
1172 CHECK_NUMBER (dimension
, 0);
1173 CHECK_NUMBER (chars
, 1);
1174 CHECK_NUMBER (final_char
, 2);
1176 if ((charset
= ISO_CHARSET_TABLE (dimension
, chars
, final_char
)) < 0)
1178 return CHARSET_SYMBOL (charset
);
1181 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1182 generic character. If GENERICP is zero, return nonzero iff C is a
1183 valid normal character. Do not call this function directly,
1184 instead use macro CHAR_VALID_P. */
1186 char_valid_p (c
, genericp
)
1189 int charset
, c1
, c2
;
1193 if (SINGLE_BYTE_CHAR_P (c
))
1195 SPLIT_NON_ASCII_CHAR (c
, charset
, c1
, c2
);
1196 if (charset
== CHARSET_COMPOSITION
)
1197 return ((c
>= MIN_CHAR_COMPOSITION
1198 && c
< MIN_CHAR_COMPOSITION
+ n_cmpchars
)
1199 || (genericp
&& c
== GENERIC_COMPOSITION_CHAR
));
1204 if (c2
<= 0) c2
= 0x20;
1208 if (c2
<= 0) c1
= c2
= 0x20;
1211 return (CHARSET_DEFINED_P (charset
)
1212 && CHAR_COMPONENTS_VALID_P (charset
, c1
, c2
));
1215 DEFUN ("char-valid-p", Fchar_valid_p
, Schar_valid_p
, 1, 2, 0,
1216 "Return t if OBJECT is a valid normal character.\n\
1217 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
1218 a valid generic character.")
1220 Lisp_Object object
, genericp
;
1222 if (! NATNUMP (object
))
1224 return (CHAR_VALID_P (XFASTINT (object
), !NILP (genericp
)) ? Qt
: Qnil
);
1227 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte
,
1228 Sunibyte_char_to_multibyte
, 1, 1, 0,
1229 "Convert the unibyte character CH to multibyte character.\n\
1230 The conversion is done based on `nonascii-translation-table' (which see)\n\
1231 or `nonascii-insert-offset' (which see).")
1237 CHECK_NUMBER (ch
, 0);
1239 if (c
< 0 || c
>= 0400)
1240 error ("Invalid unibyte character: %d", c
);
1241 c
= unibyte_char_to_multibyte (c
);
1243 error ("Can't convert to multibyte character: %d", XINT (ch
));
1244 return make_number (c
);
1247 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte
,
1248 Smultibyte_char_to_unibyte
, 1, 1, 0,
1249 "Convert the multibyte character CH to unibyte character.\n\
1250 The conversion is done based on `nonascii-translation-table' (which see)\n\
1251 or `nonascii-insert-offset' (which see).")
1257 CHECK_NUMBER (ch
, 0);
1259 if (! CHAR_VALID_P (c
, 0))
1260 error ("Invalid multibyte character: %d", c
);
1261 c
= multibyte_char_to_unibyte (c
, Qnil
);
1263 error ("Can't convert to unibyte character: %d", XINT (ch
));
1264 return make_number (c
);
1267 DEFUN ("char-bytes", Fchar_bytes
, Schar_bytes
, 1, 1, 0,
1268 "Return 1 regardless of the argument CHAR.\n\
1269 This is now an obsolete function. We keep it just for backward compatibility.")
1275 CHECK_NUMBER (ch
, 0);
1276 return make_number (1);
1279 /* Return how many bytes C will occupy in a multibyte buffer.
1280 Don't call this function directly, instead use macro CHAR_BYTES. */
1287 if (SINGLE_BYTE_CHAR_P (c
) || (c
& ~GLYPH_MASK_CHAR
))
1290 if (COMPOSITE_CHAR_P (c
))
1292 unsigned int id
= COMPOSITE_CHAR_ID (c
);
1294 bytes
= (id
< n_cmpchars
? cmpchar_table
[id
]->len
: 1);
1298 int charset
= CHAR_CHARSET (c
);
1300 bytes
= CHARSET_DEFINED_P (charset
) ? CHARSET_BYTES (charset
) : 1;
1306 /* Return the width of character of which multi-byte form starts with
1307 C. The width is measured by how many columns occupied on the
1308 screen when displayed in the current buffer. */
1310 #define ONE_BYTE_CHAR_WIDTH(c) \
1313 ? XFASTINT (current_buffer->tab_width) \
1314 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1318 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1319 : ((! NILP (current_buffer->enable_multibyte_characters) \
1320 && BASE_LEADING_CODE_P (c)) \
1321 ? WIDTH_BY_CHAR_HEAD (c) \
1324 DEFUN ("char-width", Fchar_width
, Schar_width
, 1, 1, 0,
1325 "Return width of CHAR when displayed in the current buffer.\n\
1326 The width is measured by how many columns it occupies on the screen.")
1330 Lisp_Object val
, disp
;
1332 struct Lisp_Char_Table
*dp
= buffer_display_table ();
1334 CHECK_NUMBER (ch
, 0);
1338 /* Get the way the display table would display it. */
1339 disp
= dp
? DISP_CHAR_VECTOR (dp
, c
) : Qnil
;
1342 XSETINT (val
, XVECTOR (disp
)->size
);
1343 else if (SINGLE_BYTE_CHAR_P (c
))
1344 XSETINT (val
, ONE_BYTE_CHAR_WIDTH (c
));
1345 else if (COMPOSITE_CHAR_P (c
))
1347 int id
= COMPOSITE_CHAR_ID (XFASTINT (ch
));
1348 XSETFASTINT (val
, (id
< n_cmpchars
? cmpchar_table
[id
]->width
: 1));
1352 int charset
= CHAR_CHARSET (c
);
1354 XSETFASTINT (val
, CHARSET_WIDTH (charset
));
1359 /* Return width of string STR of length LEN when displayed in the
1360 current buffer. The width is measured by how many columns it
1361 occupies on the screen. */
1368 unsigned char *endp
= str
+ len
;
1370 struct Lisp_Char_Table
*dp
= buffer_display_table ();
1374 if (*str
== LEADING_CODE_COMPOSITION
)
1376 int id
= str_cmpchar_id (str
, endp
- str
);
1385 width
+= cmpchar_table
[id
]->width
;
1386 str
+= cmpchar_table
[id
]->len
;
1393 int c
= STRING_CHAR_AND_LENGTH (str
, endp
- str
, thislen
);
1395 /* Get the way the display table would display it. */
1397 disp
= DISP_CHAR_VECTOR (dp
, c
);
1402 width
+= XVECTOR (disp
)->size
;
1404 width
+= ONE_BYTE_CHAR_WIDTH (*str
);
1412 DEFUN ("string-width", Fstring_width
, Sstring_width
, 1, 1, 0,
1413 "Return width of STRING when displayed in the current buffer.\n\
1414 Width is measured by how many columns it occupies on the screen.\n\
1415 When calculating width of a multibyte character in STRING,\n\
1416 only the base leading-code is considered; the validity of\n\
1417 the following bytes is not checked.")
1423 CHECK_STRING (str
, 0);
1424 XSETFASTINT (val
, strwidth (XSTRING (str
)->data
,
1425 STRING_BYTES (XSTRING (str
))));
1429 DEFUN ("char-direction", Fchar_direction
, Schar_direction
, 1, 1, 0,
1430 "Return the direction of CHAR.\n\
1431 The returned value is 0 for left-to-right and 1 for right-to-left.")
1437 CHECK_NUMBER (ch
, 0);
1438 charset
= CHAR_CHARSET (XFASTINT (ch
));
1439 if (!CHARSET_DEFINED_P (charset
))
1440 invalid_character (XINT (ch
));
1441 return CHARSET_TABLE_INFO (charset
, CHARSET_DIRECTION_IDX
);
1444 DEFUN ("chars-in-region", Fchars_in_region
, Schars_in_region
, 2, 2, 0,
1445 "Return number of characters between BEG and END.")
1447 Lisp_Object beg
, end
;
1451 CHECK_NUMBER_COERCE_MARKER (beg
, 0);
1452 CHECK_NUMBER_COERCE_MARKER (end
, 1);
1454 from
= min (XFASTINT (beg
), XFASTINT (end
));
1455 to
= max (XFASTINT (beg
), XFASTINT (end
));
1457 return make_number (to
- from
);
1460 /* Return the number of characters in the NBYTES bytes at PTR.
1461 This works by looking at the contents and checking for multibyte sequences.
1462 However, if the current buffer has enable-multibyte-characters = nil,
1463 we treat each byte as a character. */
1466 chars_in_text (ptr
, nbytes
)
1470 /* current_buffer is null at early stages of Emacs initialization. */
1471 if (current_buffer
== 0
1472 || NILP (current_buffer
->enable_multibyte_characters
))
1475 return multibyte_chars_in_text (ptr
, nbytes
);
1478 /* Return the number of characters in the NBYTES bytes at PTR.
1479 This works by looking at the contents and checking for multibyte sequences.
1480 It ignores enable-multibyte-characters. */
1483 multibyte_chars_in_text (ptr
, nbytes
)
1487 unsigned char *endp
;
1490 endp
= ptr
+ nbytes
;
1495 if (BASE_LEADING_CODE_P (*ptr
))
1497 PARSE_MULTIBYTE_SEQ (ptr
, nbytes
, bytes
);
1512 DEFUN ("string", Fstring
, Sstring
, 1, MANY
, 0,
1513 "Concatenate all the argument characters and make the result a string.")
1520 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM
* n
);
1521 unsigned char *p
= buf
;
1524 for (i
= 0; i
< n
; i
++)
1529 if (!INTEGERP (args
[i
]))
1530 CHECK_NUMBER (args
[i
], 0);
1532 len
= CHAR_STRING (c
, p
, str
);
1534 /* C is a composite character. */
1535 bcopy (str
, p
, len
);
1539 /* Here, we can't use make_string_from_bytes because of byte
1540 combining problem. */
1541 val
= make_string (buf
, p
- buf
);
1547 /*** Composite characters staffs ***/
1549 /* Each composite character is identified by CMPCHAR-ID which is
1550 assigned when Emacs needs the character code of the composite
1551 character (e.g. when displaying it on the screen). See the
1552 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
1553 composite character is represented in Emacs. */
1555 /* If `static' is defined, it means that it is defined to null string. */
1557 /* The following function is copied from lread.c. */
1559 hash_string (ptr
, len
)
1563 register unsigned char *p
= ptr
;
1564 register unsigned char *end
= p
+ len
;
1565 register unsigned char c
;
1566 register int hash
= 0;
1571 if (c
>= 0140) c
-= 40;
1572 hash
= ((hash
<<3) + (hash
>>28) + c
);
1574 return hash
& 07777777777;
1578 #define CMPCHAR_HASH_TABLE_SIZE 0xFFF
1580 static int *cmpchar_hash_table
[CMPCHAR_HASH_TABLE_SIZE
];
1582 /* Each element of `cmpchar_hash_table' is a pointer to an array of
1583 integer, where the 1st element is the size of the array, the 2nd
1584 element is how many elements are actually used in the array, and
1585 the remaining elements are CMPCHAR-IDs of composite characters of
1586 the same hash value. */
1587 #define CMPCHAR_HASH_SIZE(table) table[0]
1588 #define CMPCHAR_HASH_USED(table) table[1]
1589 #define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1591 /* Return CMPCHAR-ID of the composite character in STR of the length
1592 LEN. If the composite character has not yet been registered,
1593 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1594 is the sole function for assigning CMPCHAR-ID. */
1596 str_cmpchar_id (str
, len
)
1597 const unsigned char *str
;
1600 int hash_idx
, *hashp
;
1602 int embedded_rule
; /* 1 if composition rule is embedded. */
1603 int chars
; /* number of components. */
1605 struct cmpchar_info
*cmpcharp
;
1607 /* The second byte 0xFF means COMPOSITION rule is embedded. */
1608 embedded_rule
= (str
[1] == 0xFF);
1610 /* At first, get the actual length of the composite character. */
1612 const unsigned char *p
, *endp
= str
+ 1, *lastp
= str
+ len
;
1615 while (endp
< lastp
&& ! CHAR_HEAD_P (*endp
)) endp
++;
1617 /* Any composite char have at least 5-byte length. */
1630 /* No need of checking if *P is 0xA0 because
1631 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1632 p
+= BYTES_BY_CHAR_HEAD (*p
- 0x20);
1635 if (p
> endp
|| chars
< 2 || chars
> MAX_COMPONENT_COUNT
)
1636 /* Invalid components. */
1640 hash_idx
= hash_string (str
, len
) % CMPCHAR_HASH_TABLE_SIZE
;
1641 hashp
= cmpchar_hash_table
[hash_idx
];
1643 /* Then, look into the hash table. */
1645 /* Find the correct one among composite characters of the same
1647 for (i
= 2; i
< CMPCHAR_HASH_USED (hashp
); i
++)
1649 cmpcharp
= cmpchar_table
[CMPCHAR_HASH_CMPCHAR_ID (hashp
, i
)];
1650 if (len
== cmpcharp
->len
1651 && ! bcmp (str
, cmpcharp
->data
, len
))
1652 return CMPCHAR_HASH_CMPCHAR_ID (hashp
, i
);
1655 /* We have to register the composite character in cmpchar_table. */
1656 if (n_cmpchars
>= (CHAR_FIELD2_MASK
| CHAR_FIELD3_MASK
))
1657 /* No, we have no more room for a new composite character. */
1660 /* Make the entry in hash table. */
1663 /* Make a table for 8 composite characters initially. */
1664 hashp
= (cmpchar_hash_table
[hash_idx
]
1665 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1666 CMPCHAR_HASH_SIZE (hashp
) = 10;
1667 CMPCHAR_HASH_USED (hashp
) = 2;
1669 else if (CMPCHAR_HASH_USED (hashp
) >= CMPCHAR_HASH_SIZE (hashp
))
1671 CMPCHAR_HASH_SIZE (hashp
) += 8;
1672 hashp
= (cmpchar_hash_table
[hash_idx
]
1673 = (int *) xrealloc (hashp
,
1674 sizeof (int) * CMPCHAR_HASH_SIZE (hashp
)));
1676 CMPCHAR_HASH_CMPCHAR_ID (hashp
, CMPCHAR_HASH_USED (hashp
)) = n_cmpchars
;
1677 CMPCHAR_HASH_USED (hashp
)++;
1679 /* Set information of the composite character in cmpchar_table. */
1680 if (cmpchar_table_size
== 0)
1682 /* This is the first composite character to be registered. */
1683 cmpchar_table_size
= 256;
1685 = (struct cmpchar_info
**) xmalloc (sizeof (cmpchar_table
[0])
1686 * cmpchar_table_size
);
1688 else if (cmpchar_table_size
<= n_cmpchars
)
1690 cmpchar_table_size
+= 256;
1692 = (struct cmpchar_info
**) xrealloc (cmpchar_table
,
1693 sizeof (cmpchar_table
[0])
1694 * cmpchar_table_size
);
1697 cmpcharp
= (struct cmpchar_info
*) xmalloc (sizeof (struct cmpchar_info
));
1699 cmpcharp
->len
= len
;
1700 cmpcharp
->data
= (unsigned char *) xmalloc (len
+ 1);
1701 bcopy (str
, cmpcharp
->data
, len
);
1702 cmpcharp
->data
[len
] = 0;
1703 cmpcharp
->glyph_len
= chars
;
1704 cmpcharp
->glyph
= (GLYPH
*) xmalloc (sizeof (GLYPH
) * chars
);
1707 cmpcharp
->cmp_rule
= (unsigned char *) xmalloc (chars
);
1708 cmpcharp
->col_offset
= (float *) xmalloc (sizeof (float) * chars
);
1712 cmpcharp
->cmp_rule
= NULL
;
1713 cmpcharp
->col_offset
= NULL
;
1716 /* Setup GLYPH data and composition rules (if any) so as not to make
1717 them every time on displaying. */
1719 unsigned char *bufp
;
1721 float leftmost
= 0.0, rightmost
= 1.0;
1724 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1725 cmpcharp
->col_offset
[0] = 0;
1727 for (i
= 0, bufp
= cmpcharp
->data
+ 1; i
< chars
; i
++)
1730 cmpcharp
->cmp_rule
[i
] = *bufp
++;
1732 if (*bufp
== 0xA0) /* This is an ASCII character. */
1734 cmpcharp
->glyph
[i
] = FAST_MAKE_GLYPH ((*++bufp
& 0x7F), 0);
1738 else /* Multibyte character. */
1740 /* Make `bufp' point normal multi-byte form temporally. */
1743 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp
, 4, 0), 0);
1744 width
= WIDTH_BY_CHAR_HEAD (*bufp
);
1746 bufp
+= BYTES_BY_CHAR_HEAD (*bufp
- 0x20);
1749 if (embedded_rule
&& i
> 0)
1751 /* Reference points (global_ref and new_ref) are
1762 Now, we calculate the column offset of the new glyph
1763 from the left edge of the first glyph. This can avoid
1764 the same calculation everytime displaying this
1765 composite character. */
1767 /* Reference points of global glyph and new glyph. */
1768 int global_ref
= (cmpcharp
->cmp_rule
[i
] - 0xA0) / 9;
1769 int new_ref
= (cmpcharp
->cmp_rule
[i
] - 0xA0) % 9;
1770 /* Column offset relative to the first glyph. */
1771 float left
= (leftmost
1772 + (global_ref
% 3) * (rightmost
- leftmost
) / 2.0
1773 - (new_ref
% 3) * width
/ 2.0);
1775 cmpcharp
->col_offset
[i
] = left
;
1776 if (left
< leftmost
)
1778 if (left
+ width
> rightmost
)
1779 rightmost
= left
+ width
;
1783 if (width
> rightmost
)
1789 /* Now col_offset[N] are relative to the left edge of the
1790 first component. Make them relative to the left edge of
1792 for (i
= 0; i
< chars
; i
++)
1793 cmpcharp
->col_offset
[i
] -= leftmost
;
1794 /* Make rightmost holds width of overall glyph. */
1795 rightmost
-= leftmost
;
1798 cmpcharp
->width
= rightmost
;
1799 if (cmpcharp
->width
< rightmost
)
1800 /* To get a ceiling integer value. */
1804 cmpchar_table
[n_cmpchars
] = cmpcharp
;
1806 return n_cmpchars
++;
1809 /* Return the Nth element of the composite character C. If NOERROR is
1810 nonzero, return 0 on error condition (C is an invalid composite
1811 charcter, or N is out of range). */
1813 cmpchar_component (c
, n
, noerror
)
1816 int id
= COMPOSITE_CHAR_ID (c
);
1818 if (id
< 0 || id
>= n_cmpchars
)
1820 /* C is not a valid composite character. */
1821 if (noerror
) return 0;
1822 error ("Invalid composite character: %d", c
) ;
1824 if (n
>= cmpchar_table
[id
]->glyph_len
)
1826 /* No such component. */
1827 if (noerror
) return 0;
1828 args_out_of_range (make_number (c
), make_number (n
));
1830 /* No face data is stored in glyph code. */
1831 return ((int) (cmpchar_table
[id
]->glyph
[n
]));
1834 DEFUN ("cmpcharp", Fcmpcharp
, Scmpcharp
, 1, 1, 0,
1835 "T if CHAR is a composite character.")
1839 CHECK_NUMBER (ch
, 0);
1840 return (COMPOSITE_CHAR_P (XINT (ch
)) ? Qt
: Qnil
);
1843 DEFUN ("composite-char-component", Fcmpchar_component
, Scmpchar_component
,
1845 "Return the Nth component character of composite character CHARACTER.")
1847 Lisp_Object character
, n
;
1851 CHECK_NUMBER (character
, 0);
1852 CHECK_NUMBER (n
, 1);
1854 return (make_number (cmpchar_component (XINT (character
), XINT (n
), 0)));
1857 DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule
, Scmpchar_cmp_rule
,
1859 "Return the Nth composition rule of composite character CHARACTER.\n\
1860 The returned rule is for composing the Nth component\n\
1861 on the (N-1)th component.\n\
1862 If CHARACTER should be composed relatively or N is 0, return 255.")
1864 Lisp_Object character
, n
;
1868 CHECK_NUMBER (character
, 0);
1869 CHECK_NUMBER (n
, 1);
1871 id
= COMPOSITE_CHAR_ID (XINT (character
));
1872 if (id
< 0 || id
>= n_cmpchars
)
1873 error ("Invalid composite character: %d", XINT (character
));
1874 if (XINT (n
) < 0 || XINT (n
) >= cmpchar_table
[id
]->glyph_len
)
1875 args_out_of_range (character
, n
);
1877 return make_number (cmpchar_table
[id
]->cmp_rule
1878 ? cmpchar_table
[id
]->cmp_rule
[XINT (n
)]
1882 DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p
,
1883 Scmpchar_cmp_rule_p
, 1, 1, 0,
1884 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1886 Lisp_Object character
;
1890 CHECK_NUMBER (character
, 0);
1891 id
= COMPOSITE_CHAR_ID (XINT (character
));
1892 if (id
< 0 || id
>= n_cmpchars
)
1893 error ("Invalid composite character: %d", XINT (character
));
1895 return (cmpchar_table
[id
]->cmp_rule
? Qt
: Qnil
);
1898 DEFUN ("composite-char-component-count", Fcmpchar_cmp_count
,
1899 Scmpchar_cmp_count
, 1, 1, 0,
1900 "Return number of compoents of composite character CHARACTER.")
1902 Lisp_Object character
;
1906 CHECK_NUMBER (character
, 0);
1907 id
= COMPOSITE_CHAR_ID (XINT (character
));
1908 if (id
< 0 || id
>= n_cmpchars
)
1909 error ("Invalid composite character: %d", XINT (character
));
1911 return (make_number (cmpchar_table
[id
]->glyph_len
));
1914 DEFUN ("compose-string", Fcompose_string
, Scompose_string
,
1916 "Return one char string composed from all characters in STRING.")
1920 unsigned char buf
[MAX_LENGTH_OF_MULTI_BYTE_FORM
], *p
, *pend
, *ptemp
;
1923 CHECK_STRING (str
, 0);
1925 buf
[0] = LEADING_CODE_COMPOSITION
;
1926 p
= XSTRING (str
)->data
;
1927 pend
= p
+ STRING_BYTES (XSTRING (str
));
1931 if (*p
< 0x20) /* control code */
1932 error ("Invalid component character: %d", *p
);
1933 else if (*p
< 0x80) /* ASCII */
1935 if (i
+ 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM
)
1936 error ("Too long string to be composed: %s", XSTRING (str
)->data
);
1937 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1940 buf
[i
++] = *p
++ + 0x80;
1942 else if (*p
== LEADING_CODE_COMPOSITION
) /* composite char */
1944 /* Already composed. Eliminate the heading
1945 LEADING_CODE_COMPOSITION, keep the remaining bytes
1949 error ("Can't compose a rule-based composition character");
1951 while (! CHAR_HEAD_P (*p
)) p
++;
1952 if (str_cmpchar_id (ptemp
- 1, p
- ptemp
+ 1) < 0)
1953 error ("Can't compose an invalid composition character");
1954 if (i
+ (p
- ptemp
) >= MAX_LENGTH_OF_MULTI_BYTE_FORM
)
1955 error ("Too long string to be composed: %s", XSTRING (str
)->data
);
1956 bcopy (ptemp
, buf
+ i
, p
- ptemp
);
1959 else /* multibyte char */
1961 /* Add 0x20 to the base leading-code, keep the remaining
1963 int c
= STRING_CHAR_AND_LENGTH (p
, pend
- p
, len
);
1965 if (len
<= 1 || ! CHAR_VALID_P (c
, 0))
1966 error ("Can't compose an invalid character");
1967 if (i
+ len
>= MAX_LENGTH_OF_MULTI_BYTE_FORM
)
1968 error ("Too long string to be composed: %s", XSTRING (str
)->data
);
1969 bcopy (p
, buf
+ i
, len
);
1976 /* STR contains only one character, which can't be composed. */
1977 error ("Too short string to be composed: %s", XSTRING (str
)->data
);
1979 return make_string_from_bytes (buf
, 1, i
);
1984 charset_id_internal (charset_name
)
1989 val
= Fget (intern (charset_name
), Qcharset
);
1991 error ("Charset %s is not defined", charset_name
);
1993 return (XINT (XVECTOR (val
)->contents
[0]));
1996 DEFUN ("setup-special-charsets", Fsetup_special_charsets
,
1997 Ssetup_special_charsets
, 0, 0, 0, "Internal use only.")
2000 charset_latin_iso8859_1
= charset_id_internal ("latin-iso8859-1");
2001 charset_jisx0208_1978
= charset_id_internal ("japanese-jisx0208-1978");
2002 charset_jisx0208
= charset_id_internal ("japanese-jisx0208");
2003 charset_katakana_jisx0201
= charset_id_internal ("katakana-jisx0201");
2004 charset_latin_jisx0201
= charset_id_internal ("latin-jisx0201");
2005 charset_big5_1
= charset_id_internal ("chinese-big5-1");
2006 charset_big5_2
= charset_id_internal ("chinese-big5-2");
2011 init_charset_once ()
2015 staticpro (&Vcharset_table
);
2016 staticpro (&Vcharset_symbol_table
);
2017 staticpro (&Vgeneric_character_list
);
2019 /* This has to be done here, before we call Fmake_char_table. */
2020 Qcharset_table
= intern ("charset-table");
2021 staticpro (&Qcharset_table
);
2023 /* Intern this now in case it isn't already done.
2024 Setting this variable twice is harmless.
2025 But don't staticpro it here--that is done in alloc.c. */
2026 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
2028 /* Now we are ready to set up this property, so we can
2029 create the charset table. */
2030 Fput (Qcharset_table
, Qchar_table_extra_slots
, make_number (0));
2031 Vcharset_table
= Fmake_char_table (Qcharset_table
, Qnil
);
2033 Qunknown
= intern ("unknown");
2034 staticpro (&Qunknown
);
2035 Vcharset_symbol_table
= Fmake_vector (make_number (MAX_CHARSET
+ 1),
2039 for (i
= 0; i
< 2; i
++)
2040 for (j
= 0; j
< 2; j
++)
2041 for (k
= 0; k
< 128; k
++)
2042 iso_charset_table
[i
][j
][k
] = -1;
2044 bzero (cmpchar_hash_table
, sizeof cmpchar_hash_table
);
2045 cmpchar_table_size
= n_cmpchars
= 0;
2047 for (i
= 0; i
< 256; i
++)
2048 BYTES_BY_CHAR_HEAD (i
) = 1;
2049 for (i
= MIN_CHARSET_OFFICIAL_DIMENSION1
;
2050 i
<= MAX_CHARSET_OFFICIAL_DIMENSION1
; i
++)
2051 BYTES_BY_CHAR_HEAD (i
) = 2;
2052 for (i
= MIN_CHARSET_OFFICIAL_DIMENSION2
;
2053 i
<= MAX_CHARSET_OFFICIAL_DIMENSION2
; i
++)
2054 BYTES_BY_CHAR_HEAD (i
) = 3;
2055 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11
) = 3;
2056 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12
) = 3;
2057 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21
) = 4;
2058 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22
) = 4;
2059 /* The followings don't reflect the actual bytes, but just to tell
2060 that it is a start of a multibyte character. */
2061 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION
) = 2;
2062 BYTES_BY_CHAR_HEAD (0x9E) = 2;
2063 BYTES_BY_CHAR_HEAD (0x9F) = 2;
2065 for (i
= 0; i
< 128; i
++)
2066 WIDTH_BY_CHAR_HEAD (i
) = 1;
2067 for (; i
< 256; i
++)
2068 WIDTH_BY_CHAR_HEAD (i
) = 4;
2069 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11
) = 1;
2070 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12
) = 2;
2071 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21
) = 1;
2072 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22
) = 2;
2078 for (i
= 0x81; i
< 0x90; i
++)
2079 val
= Fcons (make_number ((i
- 0x70) << 7), val
);
2080 for (; i
< 0x9A; i
++)
2081 val
= Fcons (make_number ((i
- 0x8F) << 14), val
);
2082 for (i
= 0xA0; i
< 0xF0; i
++)
2083 val
= Fcons (make_number ((i
- 0x70) << 7), val
);
2084 for (; i
< 0xFF; i
++)
2085 val
= Fcons (make_number ((i
- 0xE0) << 14), val
);
2086 val
= Fcons (make_number (GENERIC_COMPOSITION_CHAR
), val
);
2087 Vgeneric_character_list
= Fnreverse (val
);
2090 nonascii_insert_offset
= 0;
2091 Vnonascii_translation_table
= Qnil
;
2099 Qascii
= intern ("ascii");
2100 staticpro (&Qascii
);
2102 Qcharset
= intern ("charset");
2103 staticpro (&Qcharset
);
2105 /* Define ASCII charset now. */
2106 update_charset_table (make_number (CHARSET_ASCII
),
2107 make_number (1), make_number (94),
2112 build_string ("ASCII"),
2113 build_string ("ASCII"),
2114 build_string ("ASCII (ISO646 IRV)"));
2115 CHARSET_SYMBOL (CHARSET_ASCII
) = Qascii
;
2116 Fput (Qascii
, Qcharset
, CHARSET_TABLE_ENTRY (CHARSET_ASCII
));
2118 Qcomposition
= intern ("composition");
2119 staticpro (&Qcomposition
);
2120 CHARSET_SYMBOL (CHARSET_COMPOSITION
) = Qcomposition
;
2122 Qauto_fill_chars
= intern ("auto-fill-chars");
2123 staticpro (&Qauto_fill_chars
);
2124 Fput (Qauto_fill_chars
, Qchar_table_extra_slots
, make_number (0));
2126 defsubr (&Sdefine_charset
);
2127 defsubr (&Sgeneric_character_list
);
2128 defsubr (&Sget_unused_iso_final_char
);
2129 defsubr (&Sdeclare_equiv_charset
);
2130 defsubr (&Sfind_charset_region
);
2131 defsubr (&Sfind_charset_string
);
2132 defsubr (&Smake_char_internal
);
2133 defsubr (&Ssplit_char
);
2134 defsubr (&Schar_charset
);
2135 defsubr (&Scharset_after
);
2136 defsubr (&Siso_charset
);
2137 defsubr (&Schar_valid_p
);
2138 defsubr (&Sunibyte_char_to_multibyte
);
2139 defsubr (&Smultibyte_char_to_unibyte
);
2140 defsubr (&Schar_bytes
);
2141 defsubr (&Schar_width
);
2142 defsubr (&Sstring_width
);
2143 defsubr (&Schar_direction
);
2144 defsubr (&Schars_in_region
);
2146 defsubr (&Scmpcharp
);
2147 defsubr (&Scmpchar_component
);
2148 defsubr (&Scmpchar_cmp_rule
);
2149 defsubr (&Scmpchar_cmp_rule_p
);
2150 defsubr (&Scmpchar_cmp_count
);
2151 defsubr (&Scompose_string
);
2152 defsubr (&Ssetup_special_charsets
);
2154 DEFVAR_LISP ("charset-list", &Vcharset_list
,
2155 "List of charsets ever defined.");
2156 Vcharset_list
= Fcons (Qascii
, Qnil
);
2158 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector
,
2159 "Vector of cons cell of a symbol and translation table ever defined.\n\
2160 An ID of a translation table is an index of this vector.");
2161 Vtranslation_table_vector
= Fmake_vector (make_number (16), Qnil
);
2163 DEFVAR_INT ("leading-code-composition", &leading_code_composition
,
2164 "Leading-code of composite characters.");
2165 leading_code_composition
= LEADING_CODE_COMPOSITION
;
2167 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11
,
2168 "Leading-code of private TYPE9N charset of column-width 1.");
2169 leading_code_private_11
= LEADING_CODE_PRIVATE_11
;
2171 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12
,
2172 "Leading-code of private TYPE9N charset of column-width 2.");
2173 leading_code_private_12
= LEADING_CODE_PRIVATE_12
;
2175 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21
,
2176 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
2177 leading_code_private_21
= LEADING_CODE_PRIVATE_21
;
2179 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22
,
2180 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
2181 leading_code_private_22
= LEADING_CODE_PRIVATE_22
;
2183 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset
,
2184 "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
2185 This is used for converting unibyte text to multibyte,\n\
2186 and for inserting character codes specified by number.\n\n\
2187 This serves to convert a Latin-1 or similar 8-bit character code\n\
2188 to the corresponding Emacs multibyte character code.\n\
2189 Typically the value should be (- (make-char CHARSET 0) 128),\n\
2190 for your choice of character set.\n\
2191 If `nonascii-translation-table' is non-nil, it overrides this variable.");
2192 nonascii_insert_offset
= 0;
2194 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table
,
2195 "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
2196 This is used for converting unibyte text to multibyte,\n\
2197 and for inserting character codes specified by number.\n\n\
2198 Conversion is performed only when multibyte characters are enabled,\n\
2199 and it serves to convert a Latin-1 or similar 8-bit character code\n\
2200 to the corresponding Emacs character code.\n\n\
2201 If this is nil, `nonascii-insert-offset' is used instead.\n\
2202 See also the docstring of `make-translation-table'.");
2203 Vnonascii_translation_table
= Qnil
;
2205 DEFVAR_INT ("min-composite-char", &min_composite_char
,
2206 "Minimum character code of a composite character.");
2207 min_composite_char
= MIN_CHAR_COMPOSITION
;
2209 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars
,
2210 "A char-table for characters which invoke auto-filling.\n\
2211 Such characters has value t in this table.");
2212 Vauto_fill_chars
= Fmake_char_table (Qauto_fill_chars
, Qnil
);
2213 CHAR_TABLE_SET (Vauto_fill_chars
, make_number (' '), Qt
);
2214 CHAR_TABLE_SET (Vauto_fill_chars
, make_number ('\n'), Qt
);