1 /* Basic multilingual character support.
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* At first, see the document in `charset.h' to understand the code in
33 #include <sys/types.h>
46 Lisp_Object Qcharset
, Qascii
, Qeight_bit_control
, Qeight_bit_graphic
;
49 /* Declaration of special leading-codes. */
50 int leading_code_private_11
; /* for private DIMENSION1 of 1-column */
51 int leading_code_private_12
; /* for private DIMENSION1 of 2-column */
52 int leading_code_private_21
; /* for private DIMENSION2 of 1-column */
53 int leading_code_private_22
; /* for private DIMENSION2 of 2-column */
55 /* Declaration of special charsets. The values are set by
56 Fsetup_special_charsets. */
57 int charset_latin_iso8859_1
; /* ISO8859-1 (Latin-1) */
58 int charset_jisx0208_1978
; /* JISX0208.1978 (Japanese Kanji old set) */
59 int charset_jisx0208
; /* JISX0208.1983 (Japanese Kanji) */
60 int charset_katakana_jisx0201
; /* JISX0201.Kana (Japanese Katakana) */
61 int charset_latin_jisx0201
; /* JISX0201.Roman (Japanese Roman) */
62 int charset_big5_1
; /* Big5 Level 1 (Chinese Traditional) */
63 int charset_big5_2
; /* Big5 Level 2 (Chinese Traditional) */
65 Lisp_Object Qcharset_table
;
67 /* A char-table containing information of each character set. */
68 Lisp_Object Vcharset_table
;
70 /* A vector of charset symbol indexed by charset-id. This is used
71 only for returning charset symbol from C functions. */
72 Lisp_Object Vcharset_symbol_table
;
74 /* A list of charset symbols ever defined. */
75 Lisp_Object Vcharset_list
;
77 /* Vector of translation table ever defined.
78 ID of a translation table is used to index this vector. */
79 Lisp_Object Vtranslation_table_vector
;
81 /* A char-table for characters which may invoke auto-filling. */
82 Lisp_Object Vauto_fill_chars
;
84 Lisp_Object Qauto_fill_chars
;
86 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
87 int bytes_by_char_head
[256];
88 int width_by_char_head
[256];
90 /* Mapping table from ISO2022's charset (specified by DIMENSION,
91 CHARS, and FINAL-CHAR) to Emacs' charset. */
92 int iso_charset_table
[2][2][128];
94 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
95 unsigned char *_fetch_multibyte_char_p
;
96 int _fetch_multibyte_char_len
;
98 /* Offset to add to a non-ASCII value when inserting it. */
99 int nonascii_insert_offset
;
101 /* Translation table for converting non-ASCII unibyte characters
102 to multibyte codes, or nil. */
103 Lisp_Object Vnonascii_translation_table
;
105 /* List of all possible generic characters. */
106 Lisp_Object Vgeneric_character_list
;
108 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
109 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
112 invalid_character (c
)
115 error ("Invalid character: 0%o, %d, 0x%x", c
, c
, c
);
118 /* Parse string STR of length LENGTH and fetch information of a
119 character at STR. Set BYTES to the byte length the character
120 occupies, CHARSET, C1, C2 to proper values of the character. */
122 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
125 (bytes) = BYTES_BY_CHAR_HEAD (c1); \
127 (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
128 else if ((bytes) == 2) \
130 if ((c1) == LEADING_CODE_8_BIT_CONTROL) \
131 (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \
133 (charset) = (c1), (c1) = (str)[1] & 0x7F; \
135 else if ((bytes) == 3) \
137 if ((c1) < LEADING_CODE_PRIVATE_11) \
138 (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \
140 (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \
143 (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \
146 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0. */
147 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
148 ((charset) == CHARSET_ASCII \
149 ? ((c1) >= 0 && (c1) <= 0x7F) \
150 : ((charset) == CHARSET_8_BIT_CONTROL \
151 ? ((c1) >= 0x80 && (c1) <= 0x9F) \
152 : ((charset) == CHARSET_8_BIT_GRAPHIC \
153 ? ((c1) >= 0x80 && (c1) <= 0xFF) \
154 : (CHARSET_DIMENSION (charset) == 1 \
155 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
156 : ((c1) >= 0x20 && (c1) <= 0x7F \
157 && (c2) >= 0x20 && (c2) <= 0x7F)))))
159 /* Store multi-byte form of the character C in STR. The caller should
160 allocate at least 4-byte area at STR in advance. Returns the
161 length of the multi-byte form. If C is an invalid character code,
164 Use macro `CHAR_STRING (C, STR)' instead of calling this function
165 directly if C can be an ASCII character. */
168 char_to_string (c
, str
)
172 unsigned char *p
= str
;
174 if (c
& CHAR_MODIFIER_MASK
) /* This includes the case C is negative. */
176 /* Multibyte character can't have a modifier bit. */
177 if (! SINGLE_BYTE_CHAR_P ((c
& ~CHAR_MODIFIER_MASK
)))
178 invalid_character (c
);
180 /* For Meta, Shift, and Control modifiers, we need special care. */
183 /* Move the meta bit to the right place for a string. */
184 c
= (c
& ~CHAR_META
) | 0x80;
188 /* Shift modifier is valid only with [A-Za-z]. */
189 if ((c
& 0377) >= 'A' && (c
& 0377) <= 'Z')
191 else if ((c
& 0377) >= 'a' && (c
& 0377) <= 'z')
192 c
= (c
& ~CHAR_SHIFT
) - ('a' - 'A');
196 /* Simulate the code in lread.c. */
197 /* Allow `\C- ' and `\C-?'. */
198 if (c
== (CHAR_CTL
| ' '))
200 else if (c
== (CHAR_CTL
| '?'))
202 /* ASCII control chars are made from letters (both cases),
203 as well as the non-letters within 0100...0137. */
204 else if ((c
& 0137) >= 0101 && (c
& 0137) <= 0132)
205 c
&= (037 | (~0177 & ~CHAR_CTL
));
206 else if ((c
& 0177) >= 0100 && (c
& 0177) <= 0137)
207 c
&= (037 | (~0177 & ~CHAR_CTL
));
210 /* If C still has any modifier bits, it is an invalid character. */
211 if (c
& CHAR_MODIFIER_MASK
)
212 invalid_character (c
);
214 if (SINGLE_BYTE_CHAR_P (c
))
216 if (ASCII_BYTE_P (c
) || c
>= 0xA0)
220 *p
++ = LEADING_CODE_8_BIT_CONTROL
;
224 else if (c
< MAX_CHAR
)
228 SPLIT_CHAR (c
, charset
, c1
, c2
);
230 if (charset
>= LEADING_CODE_EXT_11
)
231 *p
++ = (charset
< LEADING_CODE_EXT_12
232 ? LEADING_CODE_PRIVATE_11
233 : (charset
< LEADING_CODE_EXT_21
234 ? LEADING_CODE_PRIVATE_12
235 : (charset
< LEADING_CODE_EXT_22
236 ? LEADING_CODE_PRIVATE_21
237 : LEADING_CODE_PRIVATE_22
)));
239 if (c1
> 0 && c1
< 32 || c2
> 0 && c2
< 32)
240 invalid_character (c
);
249 invalid_character (c
);
254 /* Return the non-ASCII character corresponding to multi-byte form at
255 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
256 length of the multibyte form in *ACTUAL_LEN.
258 Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
259 this function directly if you want ot handle ASCII characters as
263 string_to_char (str
, len
, actual_len
)
264 const unsigned char *str
;
265 int len
, *actual_len
;
267 int c
, bytes
, charset
, c1
, c2
;
269 SPLIT_MULTIBYTE_SEQ (str
, len
, bytes
, charset
, c1
, c2
);
270 c
= MAKE_CHAR (charset
, c1
, c2
);
276 /* Return the length of the multi-byte form at string STR of length LEN.
277 Use the macro MULTIBYTE_FORM_LENGTH instead. */
279 multibyte_form_length (str
, len
)
280 const unsigned char *str
;
285 PARSE_MULTIBYTE_SEQ (str
, len
, bytes
);
289 /* Check multibyte form at string STR of length LEN and set variables
290 pointed by CHARSET, C1, and C2 to charset and position codes of the
291 character at STR, and return 0. If there's no multibyte character,
292 return -1. This should be used only in the macro SPLIT_STRING
293 which checks range of STR in advance. */
296 split_string (str
, len
, charset
, c1
, c2
)
297 const unsigned char *str
;
298 unsigned char *c1
, *c2
;
301 register int bytes
, cs
, code1
, code2
= -1;
303 SPLIT_MULTIBYTE_SEQ (str
, len
, bytes
, cs
, code1
, code2
);
304 if (cs
== CHARSET_ASCII
)
312 /* Return 1 iff character C has valid printable glyph.
313 Use the macro CHAR_PRINTABLE_P instead. */
318 int charset
, c1
, c2
, chars
;
320 if (ASCII_BYTE_P (c
))
322 else if (SINGLE_BYTE_CHAR_P (c
))
324 else if (c
>= MAX_CHAR
)
327 SPLIT_CHAR (c
, charset
, c1
, c2
);
328 if (! CHARSET_DEFINED_P (charset
))
330 if (CHARSET_CHARS (charset
) == 94
331 ? c1
<= 32 || c1
>= 127
334 if (CHARSET_DIMENSION (charset
) == 2
335 && (CHARSET_CHARS (charset
) == 94
336 ? c2
<= 32 || c2
>= 127
342 /* Translate character C by translation table TABLE. If C
343 is negative, translate a character specified by CHARSET, C1, and C2
344 (C1 and C2 are code points of the character). If no translation is
345 found in TABLE, return C. */
347 translate_char (table
, c
, charset
, c1
, c2
)
349 int c
, charset
, c1
, c2
;
352 int alt_charset
, alt_c1
, alt_c2
, dimension
;
354 if (c
< 0) c
= MAKE_CHAR (charset
, (c1
& 0x7F) , (c2
& 0x7F));
355 if (!CHAR_TABLE_P (table
)
356 || (ch
= Faref (table
, make_number (c
)), !NATNUMP (ch
)))
359 SPLIT_CHAR (XFASTINT (ch
), alt_charset
, alt_c1
, alt_c2
);
360 dimension
= CHARSET_DIMENSION (alt_charset
);
361 if (dimension
== 1 && alt_c1
> 0 || dimension
== 2 && alt_c2
> 0)
362 /* CH is not a generic character, just return it. */
363 return XFASTINT (ch
);
365 /* Since CH is a generic character, we must return a specific
366 charater which has the same position codes as C from CH. */
368 SPLIT_CHAR (c
, charset
, c1
, c2
);
369 if (dimension
!= CHARSET_DIMENSION (charset
))
370 /* We can't make such a character because of dimension mismatch. */
372 return MAKE_CHAR (alt_charset
, c1
, c2
);
375 /* Convert the unibyte character C to multibyte based on
376 Vnonascii_translation_table or nonascii_insert_offset. If they can't
377 convert C to a valid multibyte character, convert it based on
378 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
381 unibyte_char_to_multibyte (c
)
384 if (c
< 0400 && c
>= 0200)
388 if (! NILP (Vnonascii_translation_table
))
390 c
= XINT (Faref (Vnonascii_translation_table
, make_number (c
)));
391 if (c
>= 0400 && ! char_valid_p (c
, 0))
392 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
394 else if (c
>= 0240 && nonascii_insert_offset
> 0)
396 c
+= nonascii_insert_offset
;
397 if (c
< 0400 || ! char_valid_p (c
, 0))
398 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
401 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
407 /* Convert the multibyte character C to unibyte 8-bit character based
408 on Vnonascii_translation_table or nonascii_insert_offset. If
409 REV_TBL is non-nil, it should be a reverse table of
410 Vnonascii_translation_table, i.e. what given by:
411 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
414 multibyte_char_to_unibyte (c
, rev_tbl
)
418 if (!SINGLE_BYTE_CHAR_P (c
))
422 if (! CHAR_TABLE_P (rev_tbl
)
423 && CHAR_TABLE_P (Vnonascii_translation_table
))
424 rev_tbl
= Fchar_table_extra_slot (Vnonascii_translation_table
,
426 if (CHAR_TABLE_P (rev_tbl
))
429 temp
= Faref (rev_tbl
, make_number (c
));
433 c
= (c_save
& 0177) + 0200;
437 if (nonascii_insert_offset
> 0)
438 c
-= nonascii_insert_offset
;
439 if (c
< 128 || c
>= 256)
440 c
= (c_save
& 0177) + 0200;
448 /* Update the table Vcharset_table with the given arguments (see the
449 document of `define-charset' for the meaning of each argument).
450 Several other table contents are also updated. The caller should
451 check the validity of CHARSET-ID and the remaining arguments in
455 update_charset_table (charset_id
, dimension
, chars
, width
, direction
,
456 iso_final_char
, iso_graphic_plane
,
457 short_name
, long_name
, description
)
458 Lisp_Object charset_id
, dimension
, chars
, width
, direction
;
459 Lisp_Object iso_final_char
, iso_graphic_plane
;
460 Lisp_Object short_name
, long_name
, description
;
462 int charset
= XINT (charset_id
);
464 unsigned char leading_code_base
, leading_code_ext
;
466 if (NILP (CHARSET_TABLE_ENTRY (charset
)))
467 CHARSET_TABLE_ENTRY (charset
)
468 = Fmake_vector (make_number (CHARSET_MAX_IDX
), Qnil
);
470 if (NILP (long_name
))
471 long_name
= short_name
;
472 if (NILP (description
))
473 description
= long_name
;
475 /* Get byte length of multibyte form, base leading-code, and
476 extended leading-code of the charset. See the comment under the
477 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
478 bytes
= XINT (dimension
);
479 if (charset
< MIN_CHARSET_PRIVATE_DIMENSION1
)
481 /* Official charset, it doesn't have an extended leading-code. */
482 if (charset
!= CHARSET_ASCII
&& charset
!= CHARSET_8_BIT_GRAPHIC
)
483 bytes
+= 1; /* For a base leading-code. */
484 leading_code_base
= charset
;
485 leading_code_ext
= 0;
489 /* Private charset. */
490 bytes
+= 2; /* For base and extended leading-codes. */
492 = (charset
< LEADING_CODE_EXT_12
493 ? LEADING_CODE_PRIVATE_11
494 : (charset
< LEADING_CODE_EXT_21
495 ? LEADING_CODE_PRIVATE_12
496 : (charset
< LEADING_CODE_EXT_22
497 ? LEADING_CODE_PRIVATE_21
498 : LEADING_CODE_PRIVATE_22
)));
499 leading_code_ext
= charset
;
502 if (charset
!= CHARSET_ASCII
&& charset
!= CHARSET_8_BIT_GRAPHIC
503 &&BYTES_BY_CHAR_HEAD (leading_code_base
) != bytes
)
504 error ("Invalid dimension for the charset-ID %d", charset
);
506 CHARSET_TABLE_INFO (charset
, CHARSET_ID_IDX
) = charset_id
;
507 CHARSET_TABLE_INFO (charset
, CHARSET_BYTES_IDX
) = make_number (bytes
);
508 CHARSET_TABLE_INFO (charset
, CHARSET_DIMENSION_IDX
) = dimension
;
509 CHARSET_TABLE_INFO (charset
, CHARSET_CHARS_IDX
) = chars
;
510 CHARSET_TABLE_INFO (charset
, CHARSET_WIDTH_IDX
) = width
;
511 CHARSET_TABLE_INFO (charset
, CHARSET_DIRECTION_IDX
) = direction
;
512 CHARSET_TABLE_INFO (charset
, CHARSET_LEADING_CODE_BASE_IDX
)
513 = make_number (leading_code_base
);
514 CHARSET_TABLE_INFO (charset
, CHARSET_LEADING_CODE_EXT_IDX
)
515 = make_number (leading_code_ext
);
516 CHARSET_TABLE_INFO (charset
, CHARSET_ISO_FINAL_CHAR_IDX
) = iso_final_char
;
517 CHARSET_TABLE_INFO (charset
, CHARSET_ISO_GRAPHIC_PLANE_IDX
)
519 CHARSET_TABLE_INFO (charset
, CHARSET_SHORT_NAME_IDX
) = short_name
;
520 CHARSET_TABLE_INFO (charset
, CHARSET_LONG_NAME_IDX
) = long_name
;
521 CHARSET_TABLE_INFO (charset
, CHARSET_DESCRIPTION_IDX
) = description
;
522 CHARSET_TABLE_INFO (charset
, CHARSET_PLIST_IDX
) = Qnil
;
525 /* If we have already defined a charset which has the same
526 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
527 DIRECTION, we must update the entry REVERSE-CHARSET of both
528 charsets. If there's no such charset, the value of the entry
532 for (i
= 0; i
<= MAX_CHARSET
; i
++)
533 if (!NILP (CHARSET_TABLE_ENTRY (i
)))
535 if (CHARSET_DIMENSION (i
) == XINT (dimension
)
536 && CHARSET_CHARS (i
) == XINT (chars
)
537 && CHARSET_ISO_FINAL_CHAR (i
) == XINT (iso_final_char
)
538 && CHARSET_DIRECTION (i
) != XINT (direction
))
540 CHARSET_TABLE_INFO (charset
, CHARSET_REVERSE_CHARSET_IDX
)
542 CHARSET_TABLE_INFO (i
, CHARSET_REVERSE_CHARSET_IDX
) = charset_id
;
547 /* No such a charset. */
548 CHARSET_TABLE_INFO (charset
, CHARSET_REVERSE_CHARSET_IDX
)
552 if (charset
!= CHARSET_ASCII
553 && charset
< MIN_CHARSET_PRIVATE_DIMENSION1
)
555 width_by_char_head
[leading_code_base
] = XINT (width
);
557 /* Update table emacs_code_class. */
558 emacs_code_class
[charset
] = (bytes
== 2
559 ? EMACS_leading_code_2
561 ? EMACS_leading_code_3
562 : EMACS_leading_code_4
));
565 /* Update table iso_charset_table. */
566 if (iso_final_char
>= 0
567 && ISO_CHARSET_TABLE (dimension
, chars
, iso_final_char
) < 0)
568 ISO_CHARSET_TABLE (dimension
, chars
, iso_final_char
) = charset
;
573 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
576 get_charset_id (charset_symbol
)
577 Lisp_Object charset_symbol
;
582 return ((SYMBOLP (charset_symbol
)
583 && (val
= Fget (charset_symbol
, Qcharset
), VECTORP (val
))
584 && (charset
= XINT (XVECTOR (val
)->contents
[CHARSET_ID_IDX
]),
585 CHARSET_VALID_P (charset
)))
589 /* Return an identification number for a new private charset of
590 DIMENSION and WIDTH. If there's no more room for the new charset,
593 get_new_private_charset_id (dimension
, width
)
594 int dimension
, width
;
596 int charset
, from
, to
;
601 from
= LEADING_CODE_EXT_11
, to
= LEADING_CODE_EXT_12
;
603 from
= LEADING_CODE_EXT_12
, to
= LEADING_CODE_EXT_21
;
608 from
= LEADING_CODE_EXT_21
, to
= LEADING_CODE_EXT_22
;
610 from
= LEADING_CODE_EXT_22
, to
= LEADING_CODE_EXT_MAX
+ 1;
613 for (charset
= from
; charset
< to
; charset
++)
614 if (!CHARSET_DEFINED_P (charset
)) break;
616 return make_number (charset
< to
? charset
: 0);
619 DEFUN ("define-charset", Fdefine_charset
, Sdefine_charset
, 3, 3, 0,
620 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
621 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
622 treated as a private charset.\n\
623 INFO-VECTOR is a vector of the format:\n\
624 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
625 SHORT-NAME LONG-NAME DESCRIPTION]\n\
626 The meanings of each elements is as follows:\n\
627 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
628 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
629 WIDTH (integer) is the number of columns a character in the charset\n\
630 occupies on the screen: one of 0, 1, and 2.\n\
632 DIRECTION (integer) is the rendering direction of characters in the\n\
633 charset when rendering. If 0, render from left to right, else\n\
634 render from right to left.\n\
636 ISO-FINAL-CHAR (character) is the final character of the\n\
637 corresponding ISO 2022 charset.\n\
638 It may be -1 if the charset is internal use only.\n\
640 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
641 while encoding to variants of ISO 2022 coding system, one of the\n\
642 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
643 It may be -1 if the charset is internal use only.\n\
645 SHORT-NAME (string) is the short name to refer to the charset.\n\
647 LONG-NAME (string) is the long name to refer to the charset.\n\
649 DESCRIPTION (string) is the description string of the charset.")
650 (charset_id
, charset_symbol
, info_vector
)
651 Lisp_Object charset_id
, charset_symbol
, info_vector
;
655 if (!NILP (charset_id
))
656 CHECK_NUMBER (charset_id
, 0);
657 CHECK_SYMBOL (charset_symbol
, 1);
658 CHECK_VECTOR (info_vector
, 2);
660 if (! NILP (charset_id
))
662 if (! CHARSET_VALID_P (XINT (charset_id
)))
663 error ("Invalid CHARSET: %d", XINT (charset_id
));
664 else if (CHARSET_DEFINED_P (XINT (charset_id
)))
665 error ("Already defined charset: %d", XINT (charset_id
));
668 vec
= XVECTOR (info_vector
)->contents
;
669 if (XVECTOR (info_vector
)->size
!= 9
670 || !INTEGERP (vec
[0]) || !(XINT (vec
[0]) == 1 || XINT (vec
[0]) == 2)
671 || !INTEGERP (vec
[1]) || !(XINT (vec
[1]) == 94 || XINT (vec
[1]) == 96)
672 || !INTEGERP (vec
[2]) || !(XINT (vec
[2]) == 1 || XINT (vec
[2]) == 2)
673 || !INTEGERP (vec
[3]) || !(XINT (vec
[3]) == 0 || XINT (vec
[3]) == 1)
674 || !INTEGERP (vec
[4])
675 || !(XINT (vec
[4]) == -1 || XINT (vec
[4]) >= '0' && XINT (vec
[4]) <= '~')
676 || !INTEGERP (vec
[5])
677 || !(XINT (vec
[5]) == -1 || XINT (vec
[5]) == 0 || XINT (vec
[5]) == 1)
680 || !STRINGP (vec
[8]))
681 error ("Invalid info-vector argument for defining charset %s",
682 XSYMBOL (charset_symbol
)->name
->data
);
684 if (NILP (charset_id
))
686 charset_id
= get_new_private_charset_id (XINT (vec
[0]), XINT (vec
[2]));
687 if (XINT (charset_id
) == 0)
688 error ("There's no room for a new private charset %s",
689 XSYMBOL (charset_symbol
)->name
->data
);
692 update_charset_table (charset_id
, vec
[0], vec
[1], vec
[2], vec
[3],
693 vec
[4], vec
[5], vec
[6], vec
[7], vec
[8]);
694 Fput (charset_symbol
, Qcharset
, CHARSET_TABLE_ENTRY (XINT (charset_id
)));
695 CHARSET_SYMBOL (XINT (charset_id
)) = charset_symbol
;
696 Vcharset_list
= Fcons (charset_symbol
, Vcharset_list
);
700 DEFUN ("generic-character-list", Fgeneric_character_list
,
701 Sgeneric_character_list
, 0, 0, 0,
702 "Return a list of all possible generic characters.\n\
703 It includes a generic character for a charset not yet defined.")
706 return Vgeneric_character_list
;
709 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
710 Sget_unused_iso_final_char
, 2, 2, 0,
711 "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
712 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
713 CHARS is the number of characters in a dimension: 94 or 96.\n\
715 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
716 If there's no unused final char for the specified kind of charset,\n\
719 Lisp_Object dimension
, chars
;
723 CHECK_NUMBER (dimension
, 0);
724 CHECK_NUMBER (chars
, 1);
725 if (XINT (dimension
) != 1 && XINT (dimension
) != 2)
726 error ("Invalid charset dimension %d, it should be 1 or 2",
728 if (XINT (chars
) != 94 && XINT (chars
) != 96)
729 error ("Invalid charset chars %d, it should be 94 or 96",
731 for (final_char
= '0'; final_char
<= '?'; final_char
++)
733 if (ISO_CHARSET_TABLE (dimension
, chars
, make_number (final_char
)) < 0)
736 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
739 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
741 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
742 CHARSET should be defined by `defined-charset' in advance.")
743 (dimension
, chars
, final_char
, charset_symbol
)
744 Lisp_Object dimension
, chars
, final_char
, charset_symbol
;
748 CHECK_NUMBER (dimension
, 0);
749 CHECK_NUMBER (chars
, 1);
750 CHECK_NUMBER (final_char
, 2);
751 CHECK_SYMBOL (charset_symbol
, 3);
753 if (XINT (dimension
) != 1 && XINT (dimension
) != 2)
754 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension
));
755 if (XINT (chars
) != 94 && XINT (chars
) != 96)
756 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
757 if (XINT (final_char
) < '0' || XFASTINT (final_char
) > '~')
758 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
759 if ((charset
= get_charset_id (charset_symbol
)) < 0)
760 error ("Invalid charset %s", XSYMBOL (charset_symbol
)->name
->data
);
762 ISO_CHARSET_TABLE (dimension
, chars
, final_char
) = charset
;
766 /* Return information about charsets in the text at PTR of NBYTES
767 bytes, which are NCHARS characters. The value is:
769 0: Each character is represented by one byte. This is always
770 true for unibyte text.
771 1: No charsets other than ascii eight-bit-control,
772 eight-bit-graphic, and latin-1 are found.
775 In addition, if CHARSETS is nonzero, for each found charset N, set
776 CHARSETS[N] to 1. For that, callers should allocate CHARSETS
777 (MAX_CHARSET + 1 elements) in advance. It may lookup a translation
778 table TABLE if supplied. For invalid charsets, set CHARSETS[1] to
779 1 (note that there's no charset whose ID is 1). */
782 find_charset_in_text (ptr
, nchars
, nbytes
, charsets
, table
)
784 int nchars
, nbytes
, *charsets
;
787 if (nchars
== nbytes
)
789 if (charsets
&& nbytes
> 0)
791 unsigned char *endp
= ptr
+ nbytes
;
794 while (ptr
< endp
&& maskbits
!= 7)
796 maskbits
|= (*ptr
< 0x80 ? 1 : *ptr
< 0xA0 ? 2 : 4);
801 charsets
[CHARSET_ASCII
] = 1;
803 charsets
[CHARSET_8_BIT_CONTROL
] = 1;
805 charsets
[CHARSET_8_BIT_GRAPHIC
] = 1;
812 int bytes
, charset
, c1
, c2
;
814 if (! CHAR_TABLE_P (table
))
819 SPLIT_MULTIBYTE_SEQ (ptr
, len
, bytes
, charset
, c1
, c2
);
822 if (!CHARSET_DEFINED_P (charset
))
824 else if (! NILP (table
))
826 int c
= translate_char (table
, -1, charset
, c1
, c2
);
828 charset
= CHAR_CHARSET (c
);
832 && charset
!= CHARSET_ASCII
833 && charset
!= CHARSET_8_BIT_CONTROL
834 && charset
!= CHARSET_8_BIT_GRAPHIC
835 && charset
!= charset_latin_iso8859_1
)
839 charsets
[charset
] = 1;
840 else if (return_val
== 2)
847 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
849 "Return a list of charsets in the region between BEG and END.\n\
850 BEG and END are buffer positions.\n\
851 Optional arg TABLE if non-nil is a translation table to look up.\n\
853 If the region contains invalid multiybte characters,\n\
854 `unknown' is included in the returned list.\n\
856 If the current buffer is unibyte, the returned list may contain\n\
857 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
859 Lisp_Object beg
, end
, table
;
861 int charsets
[MAX_CHARSET
+ 1];
862 int from
, from_byte
, to
, stop
, stop_byte
, i
;
865 validate_region (&beg
, &end
);
866 from
= XFASTINT (beg
);
867 stop
= to
= XFASTINT (end
);
869 if (from
< GPT
&& GPT
< to
)
872 stop_byte
= GPT_BYTE
;
875 stop_byte
= CHAR_TO_BYTE (stop
);
877 from_byte
= CHAR_TO_BYTE (from
);
879 bzero (charsets
, (MAX_CHARSET
+ 1) * sizeof (int));
882 find_charset_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
883 stop_byte
- from_byte
, charsets
, table
);
886 from
= stop
, from_byte
= stop_byte
;
887 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
895 val
= Fcons (Qunknown
, val
);
896 for (i
= MAX_CHARSET
; i
>= MIN_CHARSET_OFFICIAL_DIMENSION1
; i
--)
898 val
= Fcons (CHARSET_SYMBOL (i
), val
);
900 val
= Fcons (Qascii
, val
);
904 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
906 "Return a list of charsets in STR.\n\
907 Optional arg TABLE if non-nil is a translation table to look up.\n\
909 If the region contains invalid multiybte characters,\n\
910 `unknown' is included in the returned list.\n\
912 If STR is unibyte, the returned list may contain\n\
913 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
915 Lisp_Object str
, table
;
917 int charsets
[MAX_CHARSET
+ 1];
921 CHECK_STRING (str
, 0);
923 bzero (charsets
, (MAX_CHARSET
+ 1) * sizeof (int));
924 find_charset_in_text (XSTRING (str
)->data
, XSTRING (str
)->size
,
925 STRING_BYTES (XSTRING (str
)), charsets
, table
);
929 val
= Fcons (Qunknown
, val
);
930 for (i
= MAX_CHARSET
; i
>= MIN_CHARSET_OFFICIAL_DIMENSION1
; i
--)
932 val
= Fcons (CHARSET_SYMBOL (i
), val
);
934 val
= Fcons (Qascii
, val
);
939 DEFUN ("make-char-internal", Fmake_char_internal
, Smake_char_internal
, 1, 3, 0,
941 (charset
, code1
, code2
)
942 Lisp_Object charset
, code1
, code2
;
944 int charset_id
, c1
, c2
;
946 CHECK_NUMBER (charset
, 0);
947 charset_id
= XINT (charset
);
948 if (!CHARSET_DEFINED_P (charset_id
))
949 error ("Invalid charset ID: %d", XINT (charset
));
955 CHECK_NUMBER (code1
, 1);
962 CHECK_NUMBER (code2
, 2);
966 if (charset_id
== CHARSET_ASCII
)
968 if (c1
< 0 || c1
> 0x7F)
969 goto invalid_code_posints
;
970 return make_number (c1
);
972 else if (charset_id
== CHARSET_8_BIT_CONTROL
)
974 if (c1
< 0x80 || c1
> 0x9F)
975 goto invalid_code_posints
;
976 return make_number (c1
);
978 else if (charset_id
== CHARSET_8_BIT_GRAPHIC
)
980 if (c1
< 0xA0 || c1
> 0xFF)
981 goto invalid_code_posints
;
982 return make_number (c1
);
984 else if (c1
< 0 || c1
> 0xFF || c2
< 0 || c2
> 0xFF)
985 goto invalid_code_posints
;
991 ? !CHAR_COMPONENTS_VALID_P (charset_id
, c1
, 0x20)
992 : !CHAR_COMPONENTS_VALID_P (charset_id
, c1
, c2
)))
993 goto invalid_code_posints
;
994 return make_number (MAKE_CHAR (charset_id
, c1
, c2
));
996 invalid_code_posints
:
997 error ("Invalid code points for charset ID %d: %d %d", charset_id
, c1
, c2
);
1000 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
1001 "Return list of charset and one or two position-codes of CHAR.\n\
1002 If CHAR is invalid as a character code,\n\
1003 return a list of symbol `unknown' and CHAR.")
1008 int c
, charset
, c1
, c2
;
1010 CHECK_NUMBER (ch
, 0);
1012 if (!CHAR_VALID_P (c
, 1))
1013 return Fcons (Qunknown
, Fcons (ch
, Qnil
));
1014 SPLIT_CHAR (XFASTINT (ch
), charset
, c1
, c2
);
1016 ? Fcons (CHARSET_SYMBOL (charset
),
1017 Fcons (make_number (c1
), Fcons (make_number (c2
), Qnil
)))
1018 : Fcons (CHARSET_SYMBOL (charset
), Fcons (make_number (c1
), Qnil
)));
1021 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 1, 0,
1022 "Return charset of CHAR.")
1026 CHECK_NUMBER (ch
, 0);
1028 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch
)));
1031 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
1032 "Return charset of a character in the current buffer at position POS.\n\
1033 If POS is nil, it defauls to the current point.\n\
1034 If POS is out of range, the value is nil.")
1041 ch
= Fchar_after (pos
);
1042 if (! INTEGERP (ch
))
1044 charset
= CHAR_CHARSET (XINT (ch
));
1045 return CHARSET_SYMBOL (charset
);
1048 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
1049 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
1051 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
1052 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
1053 where as Emacs distinguishes them by charset symbol.\n\
1054 See the documentation of the function `charset-info' for the meanings of\n\
1055 DIMENSION, CHARS, and FINAL-CHAR.")
1056 (dimension
, chars
, final_char
)
1057 Lisp_Object dimension
, chars
, final_char
;
1061 CHECK_NUMBER (dimension
, 0);
1062 CHECK_NUMBER (chars
, 1);
1063 CHECK_NUMBER (final_char
, 2);
1065 if ((charset
= ISO_CHARSET_TABLE (dimension
, chars
, final_char
)) < 0)
1067 return CHARSET_SYMBOL (charset
);
1070 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1071 generic character. If GENERICP is zero, return nonzero iff C is a
1072 valid normal character. Do not call this function directly,
1073 instead use macro CHAR_VALID_P. */
1075 char_valid_p (c
, genericp
)
1078 int charset
, c1
, c2
;
1082 if (SINGLE_BYTE_CHAR_P (c
))
1084 SPLIT_CHAR (c
, charset
, c1
, c2
);
1089 if (c2
<= 0) c2
= 0x20;
1093 if (c2
<= 0) c1
= c2
= 0x20;
1096 return (CHARSET_DEFINED_P (charset
)
1097 && CHAR_COMPONENTS_VALID_P (charset
, c1
, c2
));
1100 DEFUN ("char-valid-p", Fchar_valid_p
, Schar_valid_p
, 1, 2, 0,
1101 "Return t if OBJECT is a valid normal character.\n\
1102 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
1103 a valid generic character.")
1105 Lisp_Object object
, genericp
;
1107 if (! NATNUMP (object
))
1109 return (CHAR_VALID_P (XFASTINT (object
), !NILP (genericp
)) ? Qt
: Qnil
);
1112 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte
,
1113 Sunibyte_char_to_multibyte
, 1, 1, 0,
1114 "Convert the unibyte character CH to multibyte character.\n\
1115 The conversion is done based on `nonascii-translation-table' (which see)\n\
1116 or `nonascii-insert-offset' (which see).")
1122 CHECK_NUMBER (ch
, 0);
1124 if (c
< 0 || c
>= 0400)
1125 error ("Invalid unibyte character: %d", c
);
1126 c
= unibyte_char_to_multibyte (c
);
1128 error ("Can't convert to multibyte character: %d", XINT (ch
));
1129 return make_number (c
);
1132 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte
,
1133 Smultibyte_char_to_unibyte
, 1, 1, 0,
1134 "Convert the multibyte character CH to unibyte character.\n\
1135 The conversion is done based on `nonascii-translation-table' (which see)\n\
1136 or `nonascii-insert-offset' (which see).")
1142 CHECK_NUMBER (ch
, 0);
1144 if (! CHAR_VALID_P (c
, 0))
1145 error ("Invalid multibyte character: %d", c
);
1146 c
= multibyte_char_to_unibyte (c
, Qnil
);
1148 error ("Can't convert to unibyte character: %d", XINT (ch
));
1149 return make_number (c
);
1152 DEFUN ("char-bytes", Fchar_bytes
, Schar_bytes
, 1, 1, 0,
1153 "Return 1 regardless of the argument CHAR.\n\
1154 This is now an obsolete function. We keep it just for backward compatibility.")
1160 CHECK_NUMBER (ch
, 0);
1161 return make_number (1);
1164 /* Return how many bytes C will occupy in a multibyte buffer.
1165 Don't call this function directly, instead use macro CHAR_BYTES. */
1172 if (ASCII_BYTE_P (c
) || (c
& ~((1 << CHARACTERBITS
) -1)))
1174 if (SINGLE_BYTE_CHAR_P (c
) && c
>= 0xA0)
1177 charset
= CHAR_CHARSET (c
);
1178 return (CHARSET_DEFINED_P (charset
) ? CHARSET_BYTES (charset
) : 1);
1181 /* Return the width of character of which multi-byte form starts with
1182 C. The width is measured by how many columns occupied on the
1183 screen when displayed in the current buffer. */
1185 #define ONE_BYTE_CHAR_WIDTH(c) \
1188 ? XFASTINT (current_buffer->tab_width) \
1189 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1193 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1194 : ((! NILP (current_buffer->enable_multibyte_characters) \
1195 && BASE_LEADING_CODE_P (c)) \
1196 ? WIDTH_BY_CHAR_HEAD (c) \
1199 DEFUN ("char-width", Fchar_width
, Schar_width
, 1, 1, 0,
1200 "Return width of CHAR when displayed in the current buffer.\n\
1201 The width is measured by how many columns it occupies on the screen.")
1205 Lisp_Object val
, disp
;
1207 struct Lisp_Char_Table
*dp
= buffer_display_table ();
1209 CHECK_NUMBER (ch
, 0);
1213 /* Get the way the display table would display it. */
1214 disp
= dp
? DISP_CHAR_VECTOR (dp
, c
) : Qnil
;
1217 XSETINT (val
, XVECTOR (disp
)->size
);
1218 else if (SINGLE_BYTE_CHAR_P (c
))
1219 XSETINT (val
, ONE_BYTE_CHAR_WIDTH (c
));
1222 int charset
= CHAR_CHARSET (c
);
1224 XSETFASTINT (val
, CHARSET_WIDTH (charset
));
1229 /* Return width of string STR of length LEN when displayed in the
1230 current buffer. The width is measured by how many columns it
1231 occupies on the screen. */
1238 unsigned char *endp
= str
+ len
;
1240 struct Lisp_Char_Table
*dp
= buffer_display_table ();
1246 int c
= STRING_CHAR_AND_LENGTH (str
, endp
- str
, thislen
);
1248 /* Get the way the display table would display it. */
1250 disp
= DISP_CHAR_VECTOR (dp
, c
);
1255 width
+= XVECTOR (disp
)->size
;
1257 width
+= ONE_BYTE_CHAR_WIDTH (*str
);
1264 DEFUN ("string-width", Fstring_width
, Sstring_width
, 1, 1, 0,
1265 "Return width of STRING when displayed in the current buffer.\n\
1266 Width is measured by how many columns it occupies on the screen.\n\
1267 When calculating width of a multibyte character in STRING,\n\
1268 only the base leading-code is considered; the validity of\n\
1269 the following bytes is not checked.")
1275 CHECK_STRING (str
, 0);
1276 XSETFASTINT (val
, strwidth (XSTRING (str
)->data
,
1277 STRING_BYTES (XSTRING (str
))));
1281 DEFUN ("char-direction", Fchar_direction
, Schar_direction
, 1, 1, 0,
1282 "Return the direction of CHAR.\n\
1283 The returned value is 0 for left-to-right and 1 for right-to-left.")
1289 CHECK_NUMBER (ch
, 0);
1290 charset
= CHAR_CHARSET (XFASTINT (ch
));
1291 if (!CHARSET_DEFINED_P (charset
))
1292 invalid_character (XINT (ch
));
1293 return CHARSET_TABLE_INFO (charset
, CHARSET_DIRECTION_IDX
);
1296 DEFUN ("chars-in-region", Fchars_in_region
, Schars_in_region
, 2, 2, 0,
1297 "Return number of characters between BEG and END.")
1299 Lisp_Object beg
, end
;
1303 CHECK_NUMBER_COERCE_MARKER (beg
, 0);
1304 CHECK_NUMBER_COERCE_MARKER (end
, 1);
1306 from
= min (XFASTINT (beg
), XFASTINT (end
));
1307 to
= max (XFASTINT (beg
), XFASTINT (end
));
1309 return make_number (to
- from
);
1312 /* Return the number of characters in the NBYTES bytes at PTR.
1313 This works by looking at the contents and checking for multibyte sequences.
1314 However, if the current buffer has enable-multibyte-characters = nil,
1315 we treat each byte as a character. */
1318 chars_in_text (ptr
, nbytes
)
1322 /* current_buffer is null at early stages of Emacs initialization. */
1323 if (current_buffer
== 0
1324 || NILP (current_buffer
->enable_multibyte_characters
))
1327 return multibyte_chars_in_text (ptr
, nbytes
);
1330 /* Return the number of characters in the NBYTES bytes at PTR.
1331 This works by looking at the contents and checking for multibyte sequences.
1332 It ignores enable-multibyte-characters. */
1335 multibyte_chars_in_text (ptr
, nbytes
)
1339 unsigned char *endp
;
1342 endp
= ptr
+ nbytes
;
1347 PARSE_MULTIBYTE_SEQ (ptr
, endp
- ptr
, bytes
);
1355 /* Parse unibyte text at STR of LEN bytes as a multibyte text, and
1356 count the numbers of characters and bytes in it. On counting
1357 bytes, pay attention to that 8-bit characters in the range
1358 0x80..0x9F are represented by 2-byte in a multibyte text. */
1360 parse_str_as_multibyte (str
, len
, nchars
, nbytes
)
1362 int len
, *nchars
, *nbytes
;
1364 unsigned char *endp
= str
+ len
;
1365 int n
, chars
= 0, bytes
= 0;
1369 if (UNIBYTE_STR_AS_MULTIBYTE_P (str
, endp
- str
, n
))
1370 str
+= n
, bytes
+= n
;
1380 /* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
1381 It actually converts only 8-bit characters in the range 0x80..0x9F
1382 that don't contruct multibyte characters to multibyte forms. If
1383 NCHARS is nonzero, set *NCHARS to the number of characters in the
1384 text. It is assured that we can use LEN bytes at STR as a work
1385 area and that is enough. Return the number of bytes of the
1389 str_as_multibyte (str
, len
, nbytes
, nchars
)
1391 int len
, nbytes
, *nchars
;
1393 unsigned char *p
= str
, *endp
= str
+ nbytes
;
1398 while (p
< endp
&& UNIBYTE_STR_AS_MULTIBYTE_P (p
, endp
- p
, n
))
1408 safe_bcopy (p
, endp
- nbytes
, nbytes
);
1412 if (UNIBYTE_STR_AS_MULTIBYTE_P (p
, endp
- p
, n
))
1419 *to
++ = LEADING_CODE_8_BIT_CONTROL
;
1420 *to
++ = *p
++ + 0x20;
1429 /* Convert unibyte text at STR of NBYTES bytes to a multibyte text
1430 that contains the same single-byte characters. It actually
1431 converts all 8-bit characters to multibyte forms. It is assured
1432 that we can use LEN bytes at STR as a work area and that is
1436 str_to_multibyte (str
, len
, bytes
)
1440 unsigned char *p
= str
, *endp
= str
+ bytes
;
1444 while (p
< endp
&& (*p
< 0x80 || *p
>= 0xA0)) p
++;
1450 safe_bcopy (p
, endp
- bytes
, bytes
);
1454 if (*p
< 0x80 || *p
>= 0xA0)
1457 *to
++ = LEADING_CODE_8_BIT_CONTROL
, *to
++ = *p
++ + 0x20;
1462 /* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
1463 actually converts only 8-bit characters in the range 0x80..0x9F to
1467 str_as_unibyte (str
, bytes
)
1471 unsigned char *p
= str
, *endp
= str
+ bytes
;
1472 unsigned char *to
= str
;
1474 while (p
< endp
&& *p
!= LEADING_CODE_8_BIT_CONTROL
) p
++;
1478 if (*p
== LEADING_CODE_8_BIT_CONTROL
)
1479 *to
++ = *(p
+ 1) - 0x20, p
+= 2;
1487 DEFUN ("string", Fstring
, Sstring
, 1, MANY
, 0,
1488 "Concatenate all the argument characters and make the result a string.")
1494 unsigned char *buf
= (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH
* n
);
1495 unsigned char *p
= buf
;
1498 for (i
= 0; i
< n
; i
++)
1500 CHECK_NUMBER (args
[i
], 0);
1502 p
+= CHAR_STRING (c
, p
);
1505 return make_string_from_bytes (buf
, n
, p
- buf
);
1511 charset_id_internal (charset_name
)
1516 val
= Fget (intern (charset_name
), Qcharset
);
1518 error ("Charset %s is not defined", charset_name
);
1520 return (XINT (XVECTOR (val
)->contents
[0]));
1523 DEFUN ("setup-special-charsets", Fsetup_special_charsets
,
1524 Ssetup_special_charsets
, 0, 0, 0, "Internal use only.")
1527 charset_latin_iso8859_1
= charset_id_internal ("latin-iso8859-1");
1528 charset_jisx0208_1978
= charset_id_internal ("japanese-jisx0208-1978");
1529 charset_jisx0208
= charset_id_internal ("japanese-jisx0208");
1530 charset_katakana_jisx0201
= charset_id_internal ("katakana-jisx0201");
1531 charset_latin_jisx0201
= charset_id_internal ("latin-jisx0201");
1532 charset_big5_1
= charset_id_internal ("chinese-big5-1");
1533 charset_big5_2
= charset_id_internal ("chinese-big5-2");
1538 init_charset_once ()
1542 staticpro (&Vcharset_table
);
1543 staticpro (&Vcharset_symbol_table
);
1544 staticpro (&Vgeneric_character_list
);
1546 /* This has to be done here, before we call Fmake_char_table. */
1547 Qcharset_table
= intern ("charset-table");
1548 staticpro (&Qcharset_table
);
1550 /* Intern this now in case it isn't already done.
1551 Setting this variable twice is harmless.
1552 But don't staticpro it here--that is done in alloc.c. */
1553 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
1555 /* Now we are ready to set up this property, so we can
1556 create the charset table. */
1557 Fput (Qcharset_table
, Qchar_table_extra_slots
, make_number (0));
1558 Vcharset_table
= Fmake_char_table (Qcharset_table
, Qnil
);
1560 Qunknown
= intern ("unknown");
1561 staticpro (&Qunknown
);
1562 Vcharset_symbol_table
= Fmake_vector (make_number (MAX_CHARSET
+ 1),
1566 for (i
= 0; i
< 2; i
++)
1567 for (j
= 0; j
< 2; j
++)
1568 for (k
= 0; k
< 128; k
++)
1569 iso_charset_table
[i
][j
][k
] = -1;
1571 for (i
= 0; i
< 256; i
++)
1572 bytes_by_char_head
[i
] = 1;
1573 for (i
= 128; i
< MIN_CHARSET_OFFICIAL_DIMENSION2
; i
++)
1574 bytes_by_char_head
[i
] = 2;
1575 for (; i
<= MAX_CHARSET_OFFICIAL_DIMENSION2
; i
++)
1576 bytes_by_char_head
[i
] = 3;
1577 for (; i
< 160; i
++)
1578 bytes_by_char_head
[i
] = 2;
1579 bytes_by_char_head
[LEADING_CODE_PRIVATE_11
] = 3;
1580 bytes_by_char_head
[LEADING_CODE_PRIVATE_12
] = 3;
1581 bytes_by_char_head
[LEADING_CODE_PRIVATE_21
] = 4;
1582 bytes_by_char_head
[LEADING_CODE_PRIVATE_22
] = 4;
1584 for (i
= 0; i
< 128; i
++)
1585 width_by_char_head
[i
] = 1;
1586 for (; i
< 256; i
++)
1587 width_by_char_head
[i
] = 4;
1588 width_by_char_head
[LEADING_CODE_PRIVATE_11
] = 1;
1589 width_by_char_head
[LEADING_CODE_PRIVATE_12
] = 2;
1590 width_by_char_head
[LEADING_CODE_PRIVATE_21
] = 1;
1591 width_by_char_head
[LEADING_CODE_PRIVATE_22
] = 2;
1597 for (i
= 0x81; i
< 0x90; i
++)
1598 val
= Fcons (make_number ((i
- 0x70) << 7), val
);
1599 for (; i
< 0x9A; i
++)
1600 val
= Fcons (make_number ((i
- 0x8F) << 14), val
);
1601 for (i
= 0xA0; i
< 0xF0; i
++)
1602 val
= Fcons (make_number ((i
- 0x70) << 7), val
);
1603 for (; i
< 0xFF; i
++)
1604 val
= Fcons (make_number ((i
- 0xE0) << 14), val
);
1605 Vgeneric_character_list
= Fnreverse (val
);
1608 nonascii_insert_offset
= 0;
1609 Vnonascii_translation_table
= Qnil
;
1617 Qcharset
= intern ("charset");
1618 staticpro (&Qcharset
);
1620 Qascii
= intern ("ascii");
1621 staticpro (&Qascii
);
1623 Qeight_bit_control
= intern ("eight-bit-control");
1624 staticpro (&Qeight_bit_control
);
1626 Qeight_bit_graphic
= intern ("eight-bit-graphic");
1627 staticpro (&Qeight_bit_graphic
);
1629 /* Define special charsets ascii, eight-bit-control, and
1630 eight-bit-graphic. */
1631 update_charset_table (make_number (CHARSET_ASCII
),
1632 make_number (1), make_number (94),
1637 build_string ("ASCII"),
1638 Qnil
, /* same as above */
1639 build_string ("ASCII (ISO646 IRV)"));
1640 CHARSET_SYMBOL (CHARSET_ASCII
) = Qascii
;
1641 Fput (Qascii
, Qcharset
, CHARSET_TABLE_ENTRY (CHARSET_ASCII
));
1643 update_charset_table (make_number (CHARSET_8_BIT_CONTROL
),
1644 make_number (1), make_number (96),
1649 build_string ("8-bit control code (0x80..0x9F)"),
1650 Qnil
, /* same as above */
1651 Qnil
); /* same as above */
1652 CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL
) = Qeight_bit_control
;
1653 Fput (Qeight_bit_control
, Qcharset
,
1654 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL
));
1656 update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC
),
1657 make_number (1), make_number (96),
1662 build_string ("8-bit graphic char (0xA0..0xFF)"),
1663 Qnil
, /* same as above */
1664 Qnil
); /* same as above */
1665 CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC
) = Qeight_bit_graphic
;
1666 Fput (Qeight_bit_graphic
, Qcharset
,
1667 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC
));
1669 Qauto_fill_chars
= intern ("auto-fill-chars");
1670 staticpro (&Qauto_fill_chars
);
1671 Fput (Qauto_fill_chars
, Qchar_table_extra_slots
, make_number (0));
1673 defsubr (&Sdefine_charset
);
1674 defsubr (&Sgeneric_character_list
);
1675 defsubr (&Sget_unused_iso_final_char
);
1676 defsubr (&Sdeclare_equiv_charset
);
1677 defsubr (&Sfind_charset_region
);
1678 defsubr (&Sfind_charset_string
);
1679 defsubr (&Smake_char_internal
);
1680 defsubr (&Ssplit_char
);
1681 defsubr (&Schar_charset
);
1682 defsubr (&Scharset_after
);
1683 defsubr (&Siso_charset
);
1684 defsubr (&Schar_valid_p
);
1685 defsubr (&Sunibyte_char_to_multibyte
);
1686 defsubr (&Smultibyte_char_to_unibyte
);
1687 defsubr (&Schar_bytes
);
1688 defsubr (&Schar_width
);
1689 defsubr (&Sstring_width
);
1690 defsubr (&Schar_direction
);
1691 defsubr (&Schars_in_region
);
1693 defsubr (&Ssetup_special_charsets
);
1695 DEFVAR_LISP ("charset-list", &Vcharset_list
,
1696 "List of charsets ever defined.");
1697 Vcharset_list
= Fcons (Qascii
, Fcons (Qeight_bit_control
,
1698 Fcons (Qeight_bit_graphic
, Qnil
)));
1700 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector
,
1701 "Vector of cons cell of a symbol and translation table ever defined.\n\
1702 An ID of a translation table is an index of this vector.");
1703 Vtranslation_table_vector
= Fmake_vector (make_number (16), Qnil
);
1705 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11
,
1706 "Leading-code of private TYPE9N charset of column-width 1.");
1707 leading_code_private_11
= LEADING_CODE_PRIVATE_11
;
1709 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12
,
1710 "Leading-code of private TYPE9N charset of column-width 2.");
1711 leading_code_private_12
= LEADING_CODE_PRIVATE_12
;
1713 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21
,
1714 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1715 leading_code_private_21
= LEADING_CODE_PRIVATE_21
;
1717 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22
,
1718 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1719 leading_code_private_22
= LEADING_CODE_PRIVATE_22
;
1721 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset
,
1722 "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
1723 This is used for converting unibyte text to multibyte,\n\
1724 and for inserting character codes specified by number.\n\n\
1725 This serves to convert a Latin-1 or similar 8-bit character code\n\
1726 to the corresponding Emacs multibyte character code.\n\
1727 Typically the value should be (- (make-char CHARSET 0) 128),\n\
1728 for your choice of character set.\n\
1729 If `nonascii-translation-table' is non-nil, it overrides this variable.");
1730 nonascii_insert_offset
= 0;
1732 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table
,
1733 "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
1734 This is used for converting unibyte text to multibyte,\n\
1735 and for inserting character codes specified by number.\n\n\
1736 Conversion is performed only when multibyte characters are enabled,\n\
1737 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1738 to the corresponding Emacs character code.\n\n\
1739 If this is nil, `nonascii-insert-offset' is used instead.\n\
1740 See also the docstring of `make-translation-table'.");
1741 Vnonascii_translation_table
= Qnil
;
1743 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars
,
1744 "A char-table for characters which invoke auto-filling.\n\
1745 Such characters have value t in this table.");
1746 Vauto_fill_chars
= Fmake_char_table (Qauto_fill_chars
, Qnil
);
1747 CHAR_TABLE_SET (Vauto_fill_chars
, make_number (' '), Qt
);
1748 CHAR_TABLE_SET (Vauto_fill_chars
, make_number ('\n'), Qt
);