1 /* Basic multilingual character support.
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* At first, see the document in `charset.h' to understand the code in
33 #include <sys/types.h>
46 Lisp_Object Qcharset
, Qascii
, Qeight_bit_control
, Qeight_bit_graphic
;
49 /* Declaration of special leading-codes. */
50 int leading_code_private_11
; /* for private DIMENSION1 of 1-column */
51 int leading_code_private_12
; /* for private DIMENSION1 of 2-column */
52 int leading_code_private_21
; /* for private DIMENSION2 of 1-column */
53 int leading_code_private_22
; /* for private DIMENSION2 of 2-column */
55 /* Declaration of special charsets. The values are set by
56 Fsetup_special_charsets. */
57 int charset_latin_iso8859_1
; /* ISO8859-1 (Latin-1) */
58 int charset_jisx0208_1978
; /* JISX0208.1978 (Japanese Kanji old set) */
59 int charset_jisx0208
; /* JISX0208.1983 (Japanese Kanji) */
60 int charset_katakana_jisx0201
; /* JISX0201.Kana (Japanese Katakana) */
61 int charset_latin_jisx0201
; /* JISX0201.Roman (Japanese Roman) */
62 int charset_big5_1
; /* Big5 Level 1 (Chinese Traditional) */
63 int charset_big5_2
; /* Big5 Level 2 (Chinese Traditional) */
65 Lisp_Object Qcharset_table
;
67 /* A char-table containing information of each character set. */
68 Lisp_Object Vcharset_table
;
70 /* A vector of charset symbol indexed by charset-id. This is used
71 only for returning charset symbol from C functions. */
72 Lisp_Object Vcharset_symbol_table
;
74 /* A list of charset symbols ever defined. */
75 Lisp_Object Vcharset_list
;
77 /* Vector of translation table ever defined.
78 ID of a translation table is used to index this vector. */
79 Lisp_Object Vtranslation_table_vector
;
81 /* A char-table for characters which may invoke auto-filling. */
82 Lisp_Object Vauto_fill_chars
;
84 Lisp_Object Qauto_fill_chars
;
86 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
87 int bytes_by_char_head
[256];
88 int width_by_char_head
[256];
90 /* Mapping table from ISO2022's charset (specified by DIMENSION,
91 CHARS, and FINAL-CHAR) to Emacs' charset. */
92 int iso_charset_table
[2][2][128];
94 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
95 unsigned char *_fetch_multibyte_char_p
;
96 int _fetch_multibyte_char_len
;
98 /* Offset to add to a non-ASCII value when inserting it. */
99 int nonascii_insert_offset
;
101 /* Translation table for converting non-ASCII unibyte characters
102 to multibyte codes, or nil. */
103 Lisp_Object Vnonascii_translation_table
;
105 /* List of all possible generic characters. */
106 Lisp_Object Vgeneric_character_list
;
108 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
109 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
112 invalid_character (c
)
115 error ("Invalid character: 0%o, %d, 0x%x", c
, c
, c
);
118 /* Parse string STR of length LENGTH and fetch information of a
119 character at STR. Set BYTES to the byte length the character
120 occupies, CHARSET, C1, C2 to proper values of the character. */
122 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2) \
125 (bytes) = BYTES_BY_CHAR_HEAD (c1); \
127 (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
128 else if ((bytes) == 2) \
130 if ((c1) == LEADING_CODE_8_BIT_CONTROL) \
131 (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20; \
133 (charset) = (c1), (c1) = (str)[1] & 0x7F; \
135 else if ((bytes) == 3) \
137 if ((c1) < LEADING_CODE_PRIVATE_11) \
138 (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F; \
140 (charset) = (str)[1], (c1) = (str)[2] & 0x7F; \
143 (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \
146 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0. */
147 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
148 ((charset) == CHARSET_ASCII \
149 ? ((c1) >= 0 && (c1) <= 0x7F) \
150 : ((charset) == CHARSET_8_BIT_CONTROL \
151 ? ((c1) >= 0x80 && (c1) <= 0x9F) \
152 : ((charset) == CHARSET_8_BIT_GRAPHIC \
153 ? ((c1) >= 0x80 && (c1) <= 0xFF) \
154 : (CHARSET_DIMENSION (charset) == 1 \
155 ? ((c1) >= 0x20 && (c1) <= 0x7F) \
156 : ((c1) >= 0x20 && (c1) <= 0x7F \
157 && (c2) >= 0x20 && (c2) <= 0x7F)))))
159 /* Store multi-byte form of the character C in STR. The caller should
160 allocate at least 4-byte area at STR in advance. Returns the
161 length of the multi-byte form. If C is an invalid character code,
164 Use macro `CHAR_STRING (C, STR)' instead of calling this function
165 directly if C can be an ASCII character. */
168 char_to_string (c
, str
)
172 unsigned char *p
= str
;
174 if (c
& CHAR_MODIFIER_MASK
) /* This includes the case C is negative. */
176 /* Multibyte character can't have a modifier bit. */
177 if (! SINGLE_BYTE_CHAR_P ((c
& ~CHAR_MODIFIER_MASK
)))
178 invalid_character (c
);
180 /* For Meta, Shift, and Control modifiers, we need special care. */
183 /* Move the meta bit to the right place for a string. */
184 c
= (c
& ~CHAR_META
) | 0x80;
188 /* Shift modifier is valid only with [A-Za-z]. */
189 if ((c
& 0377) >= 'A' && (c
& 0377) <= 'Z')
191 else if ((c
& 0377) >= 'a' && (c
& 0377) <= 'z')
192 c
= (c
& ~CHAR_SHIFT
) - ('a' - 'A');
196 /* Simulate the code in lread.c. */
197 /* Allow `\C- ' and `\C-?'. */
198 if (c
== (CHAR_CTL
| ' '))
200 else if (c
== (CHAR_CTL
| '?'))
202 /* ASCII control chars are made from letters (both cases),
203 as well as the non-letters within 0100...0137. */
204 else if ((c
& 0137) >= 0101 && (c
& 0137) <= 0132)
205 c
&= (037 | (~0177 & ~CHAR_CTL
));
206 else if ((c
& 0177) >= 0100 && (c
& 0177) <= 0137)
207 c
&= (037 | (~0177 & ~CHAR_CTL
));
210 /* If C still has any modifier bits, it is an invalid character. */
211 if (c
& CHAR_MODIFIER_MASK
)
212 invalid_character (c
);
214 if (SINGLE_BYTE_CHAR_P (c
))
216 if (ASCII_BYTE_P (c
) || c
>= 0xA0)
220 *p
++ = LEADING_CODE_8_BIT_CONTROL
;
224 else if (c
< MAX_CHAR
)
228 SPLIT_CHAR (c
, charset
, c1
, c2
);
230 if (charset
>= LEADING_CODE_EXT_11
)
231 *p
++ = (charset
< LEADING_CODE_EXT_12
232 ? LEADING_CODE_PRIVATE_11
233 : (charset
< LEADING_CODE_EXT_21
234 ? LEADING_CODE_PRIVATE_12
235 : (charset
< LEADING_CODE_EXT_22
236 ? LEADING_CODE_PRIVATE_21
237 : LEADING_CODE_PRIVATE_22
)));
239 if (c1
> 0 && c1
< 32 || c2
> 0 && c2
< 32)
240 invalid_character (c
);
249 invalid_character (c
);
254 /* Return the non-ASCII character corresponding to multi-byte form at
255 STR of length LEN. If ACTUAL_LEN is not NULL, store the byte
256 length of the multibyte form in *ACTUAL_LEN.
258 Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
259 this function directly if you want ot handle ASCII characters as
263 string_to_char (str
, len
, actual_len
)
264 const unsigned char *str
;
265 int len
, *actual_len
;
267 int c
, bytes
, charset
, c1
, c2
;
269 SPLIT_MULTIBYTE_SEQ (str
, len
, bytes
, charset
, c1
, c2
);
270 c
= MAKE_CHAR (charset
, c1
, c2
);
276 /* Return the length of the multi-byte form at string STR of length LEN.
277 Use the macro MULTIBYTE_FORM_LENGTH instead. */
279 multibyte_form_length (str
, len
)
280 const unsigned char *str
;
285 PARSE_MULTIBYTE_SEQ (str
, len
, bytes
);
289 /* Check multibyte form at string STR of length LEN and set variables
290 pointed by CHARSET, C1, and C2 to charset and position codes of the
291 character at STR, and return 0. If there's no multibyte character,
292 return -1. This should be used only in the macro SPLIT_STRING
293 which checks range of STR in advance. */
296 split_string (str
, len
, charset
, c1
, c2
)
297 const unsigned char *str
;
298 unsigned char *c1
, *c2
;
301 register int bytes
, cs
, code1
, code2
= -1;
303 SPLIT_MULTIBYTE_SEQ (str
, len
, bytes
, cs
, code1
, code2
);
304 if (cs
== CHARSET_ASCII
)
312 /* Return 1 iff character C has valid printable glyph.
313 Use the macro CHAR_PRINTABLE_P instead. */
318 int charset
, c1
, c2
, chars
;
320 if (ASCII_BYTE_P (c
))
322 else if (SINGLE_BYTE_CHAR_P (c
))
324 else if (c
>= MAX_CHAR
)
327 SPLIT_CHAR (c
, charset
, c1
, c2
);
328 if (! CHARSET_DEFINED_P (charset
))
330 if (CHARSET_CHARS (charset
) == 94
331 ? c1
<= 32 || c1
>= 127
334 if (CHARSET_DIMENSION (charset
) == 2
335 && (CHARSET_CHARS (charset
) == 94
336 ? c2
<= 32 || c2
>= 127
342 /* Translate character C by translation table TABLE. If C
343 is negative, translate a character specified by CHARSET, C1, and C2
344 (C1 and C2 are code points of the character). If no translation is
345 found in TABLE, return C. */
347 translate_char (table
, c
, charset
, c1
, c2
)
349 int c
, charset
, c1
, c2
;
352 int alt_charset
, alt_c1
, alt_c2
, dimension
;
354 if (c
< 0) c
= MAKE_CHAR (charset
, (c1
& 0x7F) , (c2
& 0x7F));
355 if (!CHAR_TABLE_P (table
)
356 || (ch
= Faref (table
, make_number (c
)), !NATNUMP (ch
)))
359 SPLIT_CHAR (XFASTINT (ch
), alt_charset
, alt_c1
, alt_c2
);
360 dimension
= CHARSET_DIMENSION (alt_charset
);
361 if (dimension
== 1 && alt_c1
> 0 || dimension
== 2 && alt_c2
> 0)
362 /* CH is not a generic character, just return it. */
363 return XFASTINT (ch
);
365 /* Since CH is a generic character, we must return a specific
366 charater which has the same position codes as C from CH. */
368 SPLIT_CHAR (c
, charset
, c1
, c2
);
369 if (dimension
!= CHARSET_DIMENSION (charset
))
370 /* We can't make such a character because of dimension mismatch. */
372 return MAKE_CHAR (alt_charset
, c1
, c2
);
375 /* Convert the unibyte character C to multibyte based on
376 Vnonascii_translation_table or nonascii_insert_offset. If they can't
377 convert C to a valid multibyte character, convert it based on
378 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
381 unibyte_char_to_multibyte (c
)
384 if (c
< 0400 && c
>= 0200)
388 if (! NILP (Vnonascii_translation_table
))
390 c
= XINT (Faref (Vnonascii_translation_table
, make_number (c
)));
391 if (c
>= 0400 && ! char_valid_p (c
, 0))
392 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
394 else if (c
>= 0240 && nonascii_insert_offset
> 0)
396 c
+= nonascii_insert_offset
;
397 if (c
< 0400 || ! char_valid_p (c
, 0))
398 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
401 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
407 /* Convert the multibyte character C to unibyte 8-bit character based
408 on Vnonascii_translation_table or nonascii_insert_offset. If
409 REV_TBL is non-nil, it should be a reverse table of
410 Vnonascii_translation_table, i.e. what given by:
411 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
414 multibyte_char_to_unibyte (c
, rev_tbl
)
418 if (!SINGLE_BYTE_CHAR_P (c
))
422 if (! CHAR_TABLE_P (rev_tbl
)
423 && CHAR_TABLE_P (Vnonascii_translation_table
))
424 rev_tbl
= Fchar_table_extra_slot (Vnonascii_translation_table
,
426 if (CHAR_TABLE_P (rev_tbl
))
429 temp
= Faref (rev_tbl
, make_number (c
));
433 c
= (c_save
& 0177) + 0200;
437 if (nonascii_insert_offset
> 0)
438 c
-= nonascii_insert_offset
;
439 if (c
< 128 || c
>= 256)
440 c
= (c_save
& 0177) + 0200;
448 /* Update the table Vcharset_table with the given arguments (see the
449 document of `define-charset' for the meaning of each argument).
450 Several other table contents are also updated. The caller should
451 check the validity of CHARSET-ID and the remaining arguments in
455 update_charset_table (charset_id
, dimension
, chars
, width
, direction
,
456 iso_final_char
, iso_graphic_plane
,
457 short_name
, long_name
, description
)
458 Lisp_Object charset_id
, dimension
, chars
, width
, direction
;
459 Lisp_Object iso_final_char
, iso_graphic_plane
;
460 Lisp_Object short_name
, long_name
, description
;
462 int charset
= XINT (charset_id
);
464 unsigned char leading_code_base
, leading_code_ext
;
466 if (NILP (CHARSET_TABLE_ENTRY (charset
)))
467 CHARSET_TABLE_ENTRY (charset
)
468 = Fmake_vector (make_number (CHARSET_MAX_IDX
), Qnil
);
470 /* Get byte length of multibyte form, base leading-code, and
471 extended leading-code of the charset. See the comment under the
472 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
473 bytes
= XINT (dimension
);
474 if (charset
< MIN_CHARSET_PRIVATE_DIMENSION1
)
476 /* Official charset, it doesn't have an extended leading-code. */
477 if (charset
!= CHARSET_ASCII
&& charset
!= CHARSET_8_BIT_GRAPHIC
)
478 bytes
+= 1; /* For a base leading-code. */
479 leading_code_base
= charset
;
480 leading_code_ext
= 0;
484 /* Private charset. */
485 bytes
+= 2; /* For base and extended leading-codes. */
487 = (charset
< LEADING_CODE_EXT_12
488 ? LEADING_CODE_PRIVATE_11
489 : (charset
< LEADING_CODE_EXT_21
490 ? LEADING_CODE_PRIVATE_12
491 : (charset
< LEADING_CODE_EXT_22
492 ? LEADING_CODE_PRIVATE_21
493 : LEADING_CODE_PRIVATE_22
)));
494 leading_code_ext
= charset
;
497 if (charset
!= CHARSET_ASCII
&& charset
!= CHARSET_8_BIT_GRAPHIC
498 &&BYTES_BY_CHAR_HEAD (leading_code_base
) != bytes
)
499 error ("Invalid dimension for the charset-ID %d", charset
);
501 CHARSET_TABLE_INFO (charset
, CHARSET_ID_IDX
) = charset_id
;
502 CHARSET_TABLE_INFO (charset
, CHARSET_BYTES_IDX
) = make_number (bytes
);
503 CHARSET_TABLE_INFO (charset
, CHARSET_DIMENSION_IDX
) = dimension
;
504 CHARSET_TABLE_INFO (charset
, CHARSET_CHARS_IDX
) = chars
;
505 CHARSET_TABLE_INFO (charset
, CHARSET_WIDTH_IDX
) = width
;
506 CHARSET_TABLE_INFO (charset
, CHARSET_DIRECTION_IDX
) = direction
;
507 CHARSET_TABLE_INFO (charset
, CHARSET_LEADING_CODE_BASE_IDX
)
508 = make_number (leading_code_base
);
509 CHARSET_TABLE_INFO (charset
, CHARSET_LEADING_CODE_EXT_IDX
)
510 = make_number (leading_code_ext
);
511 CHARSET_TABLE_INFO (charset
, CHARSET_ISO_FINAL_CHAR_IDX
) = iso_final_char
;
512 CHARSET_TABLE_INFO (charset
, CHARSET_ISO_GRAPHIC_PLANE_IDX
)
514 CHARSET_TABLE_INFO (charset
, CHARSET_SHORT_NAME_IDX
) = short_name
;
515 CHARSET_TABLE_INFO (charset
, CHARSET_LONG_NAME_IDX
) = long_name
;
516 CHARSET_TABLE_INFO (charset
, CHARSET_DESCRIPTION_IDX
) = description
;
517 CHARSET_TABLE_INFO (charset
, CHARSET_PLIST_IDX
) = Qnil
;
520 /* If we have already defined a charset which has the same
521 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
522 DIRECTION, we must update the entry REVERSE-CHARSET of both
523 charsets. If there's no such charset, the value of the entry
527 for (i
= 0; i
<= MAX_CHARSET
; i
++)
528 if (!NILP (CHARSET_TABLE_ENTRY (i
)))
530 if (CHARSET_DIMENSION (i
) == XINT (dimension
)
531 && CHARSET_CHARS (i
) == XINT (chars
)
532 && CHARSET_ISO_FINAL_CHAR (i
) == XINT (iso_final_char
)
533 && CHARSET_DIRECTION (i
) != XINT (direction
))
535 CHARSET_TABLE_INFO (charset
, CHARSET_REVERSE_CHARSET_IDX
)
537 CHARSET_TABLE_INFO (i
, CHARSET_REVERSE_CHARSET_IDX
) = charset_id
;
542 /* No such a charset. */
543 CHARSET_TABLE_INFO (charset
, CHARSET_REVERSE_CHARSET_IDX
)
547 if (charset
!= CHARSET_ASCII
548 && charset
< MIN_CHARSET_PRIVATE_DIMENSION1
)
550 width_by_char_head
[leading_code_base
] = XINT (width
);
552 /* Update table emacs_code_class. */
553 emacs_code_class
[charset
] = (bytes
== 2
554 ? EMACS_leading_code_2
556 ? EMACS_leading_code_3
557 : EMACS_leading_code_4
));
560 /* Update table iso_charset_table. */
561 if (iso_final_char
>= 0
562 && ISO_CHARSET_TABLE (dimension
, chars
, iso_final_char
) < 0)
563 ISO_CHARSET_TABLE (dimension
, chars
, iso_final_char
) = charset
;
568 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
571 get_charset_id (charset_symbol
)
572 Lisp_Object charset_symbol
;
577 return ((SYMBOLP (charset_symbol
)
578 && (val
= Fget (charset_symbol
, Qcharset
), VECTORP (val
))
579 && (charset
= XINT (XVECTOR (val
)->contents
[CHARSET_ID_IDX
]),
580 CHARSET_VALID_P (charset
)))
584 /* Return an identification number for a new private charset of
585 DIMENSION and WIDTH. If there's no more room for the new charset,
588 get_new_private_charset_id (dimension
, width
)
589 int dimension
, width
;
591 int charset
, from
, to
;
596 from
= LEADING_CODE_EXT_11
, to
= LEADING_CODE_EXT_12
;
598 from
= LEADING_CODE_EXT_12
, to
= LEADING_CODE_EXT_21
;
603 from
= LEADING_CODE_EXT_21
, to
= LEADING_CODE_EXT_22
;
605 from
= LEADING_CODE_EXT_22
, to
= LEADING_CODE_EXT_MAX
+ 1;
608 for (charset
= from
; charset
< to
; charset
++)
609 if (!CHARSET_DEFINED_P (charset
)) break;
611 return make_number (charset
< to
? charset
: 0);
614 DEFUN ("define-charset", Fdefine_charset
, Sdefine_charset
, 3, 3, 0,
615 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
616 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
617 treated as a private charset.\n\
618 INFO-VECTOR is a vector of the format:\n\
619 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
620 SHORT-NAME LONG-NAME DESCRIPTION]\n\
621 The meanings of each elements is as follows:\n\
622 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
623 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
624 WIDTH (integer) is the number of columns a character in the charset\n\
625 occupies on the screen: one of 0, 1, and 2.\n\
627 DIRECTION (integer) is the rendering direction of characters in the\n\
628 charset when rendering. If 0, render from left to right, else\n\
629 render from right to left.\n\
631 ISO-FINAL-CHAR (character) is the final character of the\n\
632 corresponding ISO 2022 charset.\n\
633 It may be -1 if the charset is internal use only.\n\
635 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
636 while encoding to variants of ISO 2022 coding system, one of the\n\
637 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
638 It may be -1 if the charset is internal use only.\n\
640 SHORT-NAME (string) is the short name to refer to the charset.\n\
642 LONG-NAME (string) is the long name to refer to the charset.\n\
644 DESCRIPTION (string) is the description string of the charset.")
645 (charset_id
, charset_symbol
, info_vector
)
646 Lisp_Object charset_id
, charset_symbol
, info_vector
;
650 if (!NILP (charset_id
))
651 CHECK_NUMBER (charset_id
, 0);
652 CHECK_SYMBOL (charset_symbol
, 1);
653 CHECK_VECTOR (info_vector
, 2);
655 if (! NILP (charset_id
))
657 if (! CHARSET_VALID_P (XINT (charset_id
)))
658 error ("Invalid CHARSET: %d", XINT (charset_id
));
659 else if (CHARSET_DEFINED_P (XINT (charset_id
)))
660 error ("Already defined charset: %d", XINT (charset_id
));
663 vec
= XVECTOR (info_vector
)->contents
;
664 if (XVECTOR (info_vector
)->size
!= 9
665 || !INTEGERP (vec
[0]) || !(XINT (vec
[0]) == 1 || XINT (vec
[0]) == 2)
666 || !INTEGERP (vec
[1]) || !(XINT (vec
[1]) == 94 || XINT (vec
[1]) == 96)
667 || !INTEGERP (vec
[2]) || !(XINT (vec
[2]) == 1 || XINT (vec
[2]) == 2)
668 || !INTEGERP (vec
[3]) || !(XINT (vec
[3]) == 0 || XINT (vec
[3]) == 1)
669 || !INTEGERP (vec
[4])
670 || !(XINT (vec
[4]) == -1 || XINT (vec
[4]) >= '0' && XINT (vec
[4]) <= '~')
671 || !INTEGERP (vec
[5])
672 || !(XINT (vec
[5]) == -1 || XINT (vec
[5]) == 0 || XINT (vec
[5]) == 1)
675 || !STRINGP (vec
[8]))
676 error ("Invalid info-vector argument for defining charset %s",
677 XSYMBOL (charset_symbol
)->name
->data
);
679 if (NILP (charset_id
))
681 charset_id
= get_new_private_charset_id (XINT (vec
[0]), XINT (vec
[2]));
682 if (XINT (charset_id
) == 0)
683 error ("There's no room for a new private charset %s",
684 XSYMBOL (charset_symbol
)->name
->data
);
687 update_charset_table (charset_id
, vec
[0], vec
[1], vec
[2], vec
[3],
688 vec
[4], vec
[5], vec
[6], vec
[7], vec
[8]);
689 Fput (charset_symbol
, Qcharset
, CHARSET_TABLE_ENTRY (XINT (charset_id
)));
690 CHARSET_SYMBOL (XINT (charset_id
)) = charset_symbol
;
691 Vcharset_list
= Fcons (charset_symbol
, Vcharset_list
);
695 DEFUN ("generic-character-list", Fgeneric_character_list
,
696 Sgeneric_character_list
, 0, 0, 0,
697 "Return a list of all possible generic characters.\n\
698 It includes a generic character for a charset not yet defined.")
701 return Vgeneric_character_list
;
704 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
705 Sget_unused_iso_final_char
, 2, 2, 0,
706 "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
707 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
708 CHARS is the number of characters in a dimension: 94 or 96.\n\
710 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
711 If there's no unused final char for the specified kind of charset,\n\
714 Lisp_Object dimension
, chars
;
718 CHECK_NUMBER (dimension
, 0);
719 CHECK_NUMBER (chars
, 1);
720 if (XINT (dimension
) != 1 && XINT (dimension
) != 2)
721 error ("Invalid charset dimension %d, it should be 1 or 2",
723 if (XINT (chars
) != 94 && XINT (chars
) != 96)
724 error ("Invalid charset chars %d, it should be 94 or 96",
726 for (final_char
= '0'; final_char
<= '?'; final_char
++)
728 if (ISO_CHARSET_TABLE (dimension
, chars
, make_number (final_char
)) < 0)
731 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
734 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
736 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
737 CHARSET should be defined by `defined-charset' in advance.")
738 (dimension
, chars
, final_char
, charset_symbol
)
739 Lisp_Object dimension
, chars
, final_char
, charset_symbol
;
743 CHECK_NUMBER (dimension
, 0);
744 CHECK_NUMBER (chars
, 1);
745 CHECK_NUMBER (final_char
, 2);
746 CHECK_SYMBOL (charset_symbol
, 3);
748 if (XINT (dimension
) != 1 && XINT (dimension
) != 2)
749 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension
));
750 if (XINT (chars
) != 94 && XINT (chars
) != 96)
751 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
752 if (XINT (final_char
) < '0' || XFASTINT (final_char
) > '~')
753 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
754 if ((charset
= get_charset_id (charset_symbol
)) < 0)
755 error ("Invalid charset %s", XSYMBOL (charset_symbol
)->name
->data
);
757 ISO_CHARSET_TABLE (dimension
, chars
, final_char
) = charset
;
761 /* Return information about charsets in the text at PTR of NBYTES
762 bytes, which are NCHARS characters. The value is:
764 0: Each character is represented by one byte. This is alwasy
765 true for unibyte text.
766 1: No charsets other than ascii eight-bit-control,
767 eight-bit-graphic, and latin-1 are found.
770 In addition, if CHARSETS is nonzero, for each found charset N, set
771 CHARSETS[N] to 1. For that, callers should allocate CHARSETS
772 (MAX_CHARSET + 1 elements) in advance. It may lookup a translation
773 table TABLE if supplied. For invalid charsets, set CHARSETS[1] to
774 1 (note that there's no charset whose ID is 1). */
777 find_charset_in_text (ptr
, nchars
, nbytes
, charsets
, table
)
779 int nchars
, nbytes
, *charsets
;
782 if (nchars
== nbytes
)
784 if (charsets
&& nbytes
> 0)
786 unsigned char *endp
= ptr
+ nbytes
;
789 while (ptr
< endp
&& maskbits
!= 7)
791 maskbits
|= (*ptr
< 0x80 ? 1 : *ptr
< 0xA0 ? 2 : 4);
796 charsets
[CHARSET_ASCII
] = 1;
798 charsets
[CHARSET_8_BIT_CONTROL
] = 1;
800 charsets
[CHARSET_8_BIT_GRAPHIC
] = 1;
807 int bytes
, charset
, c1
, c2
;
809 if (! CHAR_TABLE_P (table
))
814 SPLIT_MULTIBYTE_SEQ (ptr
, len
, bytes
, charset
, c1
, c2
);
817 if (!CHARSET_DEFINED_P (charset
))
819 else if (! NILP (table
))
821 int c
= translate_char (table
, -1, charset
, c1
, c2
);
823 charset
= CHAR_CHARSET (c
);
827 && charset
!= CHARSET_ASCII
828 && charset
!= CHARSET_8_BIT_CONTROL
829 && charset
!= CHARSET_8_BIT_GRAPHIC
830 && charset
!= charset_latin_iso8859_1
)
834 charsets
[charset
] = 1;
835 else if (return_val
== 2)
842 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
844 "Return a list of charsets in the region between BEG and END.\n\
845 BEG and END are buffer positions.\n\
846 Optional arg TABLE if non-nil is a translation table to look up.\n\
848 If the region contains invalid multiybte characters,\n\
849 `unknown' is included in the returned list.\n\
851 If the current buffer is unibyte, the returned list may contain\n\
852 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
854 Lisp_Object beg
, end
, table
;
856 int charsets
[MAX_CHARSET
+ 1];
857 int from
, from_byte
, to
, stop
, stop_byte
, i
;
860 validate_region (&beg
, &end
);
861 from
= XFASTINT (beg
);
862 stop
= to
= XFASTINT (end
);
864 if (from
< GPT
&& GPT
< to
)
867 stop_byte
= GPT_BYTE
;
870 stop_byte
= CHAR_TO_BYTE (stop
);
872 from_byte
= CHAR_TO_BYTE (from
);
874 bzero (charsets
, (MAX_CHARSET
+ 1) * sizeof (int));
877 find_charset_in_text (BYTE_POS_ADDR (from_byte
), stop
- from
,
878 stop_byte
- from_byte
, charsets
, table
);
881 from
= stop
, from_byte
= stop_byte
;
882 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
890 val
= Fcons (Qunknown
, val
);
891 for (i
= MAX_CHARSET
; i
>= MIN_CHARSET_OFFICIAL_DIMENSION1
; i
--)
893 val
= Fcons (CHARSET_SYMBOL (i
), val
);
895 val
= Fcons (Qascii
, val
);
899 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
901 "Return a list of charsets in STR.\n\
902 Optional arg TABLE if non-nil is a translation table to look up.\n\
904 If the region contains invalid multiybte characters,\n\
905 `unknown' is included in the returned list.\n\
907 If STR is unibyte, the returned list may contain\n\
908 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
910 Lisp_Object str
, table
;
912 int charsets
[MAX_CHARSET
+ 1];
916 CHECK_STRING (str
, 0);
918 bzero (charsets
, (MAX_CHARSET
+ 1) * sizeof (int));
919 find_charset_in_text (XSTRING (str
)->data
, XSTRING (str
)->size
,
920 STRING_BYTES (XSTRING (str
)), charsets
, table
);
924 val
= Fcons (Qunknown
, val
);
925 for (i
= MAX_CHARSET
; i
>= MIN_CHARSET_OFFICIAL_DIMENSION1
; i
--)
927 val
= Fcons (CHARSET_SYMBOL (i
), val
);
929 val
= Fcons (Qascii
, val
);
934 DEFUN ("make-char-internal", Fmake_char_internal
, Smake_char_internal
, 1, 3, 0,
936 (charset
, code1
, code2
)
937 Lisp_Object charset
, code1
, code2
;
939 int charset_id
, c1
, c2
;
941 CHECK_NUMBER (charset
, 0);
942 charset_id
= XINT (charset
);
943 if (!CHARSET_DEFINED_P (charset_id
))
944 error ("Invalid charset ID: %d", XINT (charset
));
950 CHECK_NUMBER (code1
, 1);
957 CHECK_NUMBER (code2
, 2);
961 if (charset_id
== CHARSET_ASCII
)
963 if (c1
< 0 || c1
> 0x7F)
964 goto invalid_code_posints
;
965 return make_number (c1
);
967 else if (charset_id
== CHARSET_8_BIT_CONTROL
)
969 if (c1
< 0x80 || c1
> 0x9F)
970 goto invalid_code_posints
;
971 return make_number (c1
);
973 else if (charset_id
== CHARSET_8_BIT_GRAPHIC
)
975 if (c1
< 0xA0 || c1
> 0xFF)
976 goto invalid_code_posints
;
977 return make_number (c1
);
979 else if (c1
< 0 || c1
> 0xFF || c2
< 0 || c2
> 0xFF)
980 goto invalid_code_posints
;
986 ? !CHAR_COMPONENTS_VALID_P (charset_id
, c1
, 0x20)
987 : !CHAR_COMPONENTS_VALID_P (charset_id
, c1
, c2
)))
988 goto invalid_code_posints
;
989 return make_number (MAKE_CHAR (charset_id
, c1
, c2
));
991 invalid_code_posints
:
992 error ("Invalid code points for charset ID %d: %d %d", charset_id
, c1
, c2
);
995 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
996 "Return list of charset and one or two position-codes of CHAR.\n\
997 If CHAR is invalid as a character code,\n\
998 return a list of symbol `unknown' and CHAR.")
1003 int c
, charset
, c1
, c2
;
1005 CHECK_NUMBER (ch
, 0);
1007 if (!CHAR_VALID_P (c
, 1))
1008 return Fcons (Qunknown
, Fcons (ch
, Qnil
));
1009 SPLIT_CHAR (XFASTINT (ch
), charset
, c1
, c2
);
1011 ? Fcons (CHARSET_SYMBOL (charset
),
1012 Fcons (make_number (c1
), Fcons (make_number (c2
), Qnil
)))
1013 : Fcons (CHARSET_SYMBOL (charset
), Fcons (make_number (c1
), Qnil
)));
1016 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 1, 0,
1017 "Return charset of CHAR.")
1021 CHECK_NUMBER (ch
, 0);
1023 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch
)));
1026 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
1027 "Return charset of a character in the current buffer at position POS.\n\
1028 If POS is nil, it defauls to the current point.\n\
1029 If POS is out of range, the value is nil.")
1036 ch
= Fchar_after (pos
);
1037 if (! INTEGERP (ch
))
1039 charset
= CHAR_CHARSET (XINT (ch
));
1040 return CHARSET_SYMBOL (charset
);
1043 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
1044 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
1046 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
1047 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
1048 where as Emacs distinguishes them by charset symbol.\n\
1049 See the documentation of the function `charset-info' for the meanings of\n\
1050 DIMENSION, CHARS, and FINAL-CHAR.")
1051 (dimension
, chars
, final_char
)
1052 Lisp_Object dimension
, chars
, final_char
;
1056 CHECK_NUMBER (dimension
, 0);
1057 CHECK_NUMBER (chars
, 1);
1058 CHECK_NUMBER (final_char
, 2);
1060 if ((charset
= ISO_CHARSET_TABLE (dimension
, chars
, final_char
)) < 0)
1062 return CHARSET_SYMBOL (charset
);
1065 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1066 generic character. If GENERICP is zero, return nonzero iff C is a
1067 valid normal character. Do not call this function directly,
1068 instead use macro CHAR_VALID_P. */
1070 char_valid_p (c
, genericp
)
1073 int charset
, c1
, c2
;
1077 if (SINGLE_BYTE_CHAR_P (c
))
1079 SPLIT_CHAR (c
, charset
, c1
, c2
);
1084 if (c2
<= 0) c2
= 0x20;
1088 if (c2
<= 0) c1
= c2
= 0x20;
1091 return (CHARSET_DEFINED_P (charset
)
1092 && CHAR_COMPONENTS_VALID_P (charset
, c1
, c2
));
1095 DEFUN ("char-valid-p", Fchar_valid_p
, Schar_valid_p
, 1, 2, 0,
1096 "Return t if OBJECT is a valid normal character.\n\
1097 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
1098 a valid generic character.")
1100 Lisp_Object object
, genericp
;
1102 if (! NATNUMP (object
))
1104 return (CHAR_VALID_P (XFASTINT (object
), !NILP (genericp
)) ? Qt
: Qnil
);
1107 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte
,
1108 Sunibyte_char_to_multibyte
, 1, 1, 0,
1109 "Convert the unibyte character CH to multibyte character.\n\
1110 The conversion is done based on `nonascii-translation-table' (which see)\n\
1111 or `nonascii-insert-offset' (which see).")
1117 CHECK_NUMBER (ch
, 0);
1119 if (c
< 0 || c
>= 0400)
1120 error ("Invalid unibyte character: %d", c
);
1121 c
= unibyte_char_to_multibyte (c
);
1123 error ("Can't convert to multibyte character: %d", XINT (ch
));
1124 return make_number (c
);
1127 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte
,
1128 Smultibyte_char_to_unibyte
, 1, 1, 0,
1129 "Convert the multibyte character CH to unibyte character.\n\
1130 The conversion is done based on `nonascii-translation-table' (which see)\n\
1131 or `nonascii-insert-offset' (which see).")
1137 CHECK_NUMBER (ch
, 0);
1139 if (! CHAR_VALID_P (c
, 0))
1140 error ("Invalid multibyte character: %d", c
);
1141 c
= multibyte_char_to_unibyte (c
, Qnil
);
1143 error ("Can't convert to unibyte character: %d", XINT (ch
));
1144 return make_number (c
);
1147 DEFUN ("char-bytes", Fchar_bytes
, Schar_bytes
, 1, 1, 0,
1148 "Return 1 regardless of the argument CHAR.\n\
1149 This is now an obsolete function. We keep it just for backward compatibility.")
1155 CHECK_NUMBER (ch
, 0);
1156 return make_number (1);
1159 /* Return how many bytes C will occupy in a multibyte buffer.
1160 Don't call this function directly, instead use macro CHAR_BYTES. */
1167 if (ASCII_BYTE_P (c
) || (c
& ~((1 << CHARACTERBITS
) -1)))
1169 if (SINGLE_BYTE_CHAR_P (c
) && c
>= 0xA0)
1172 charset
= CHAR_CHARSET (c
);
1173 return (CHARSET_DEFINED_P (charset
) ? CHARSET_BYTES (charset
) : 1);
1176 /* Return the width of character of which multi-byte form starts with
1177 C. The width is measured by how many columns occupied on the
1178 screen when displayed in the current buffer. */
1180 #define ONE_BYTE_CHAR_WIDTH(c) \
1183 ? XFASTINT (current_buffer->tab_width) \
1184 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1188 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1189 : ((! NILP (current_buffer->enable_multibyte_characters) \
1190 && BASE_LEADING_CODE_P (c)) \
1191 ? WIDTH_BY_CHAR_HEAD (c) \
1194 DEFUN ("char-width", Fchar_width
, Schar_width
, 1, 1, 0,
1195 "Return width of CHAR when displayed in the current buffer.\n\
1196 The width is measured by how many columns it occupies on the screen.")
1200 Lisp_Object val
, disp
;
1202 struct Lisp_Char_Table
*dp
= buffer_display_table ();
1204 CHECK_NUMBER (ch
, 0);
1208 /* Get the way the display table would display it. */
1209 disp
= dp
? DISP_CHAR_VECTOR (dp
, c
) : Qnil
;
1212 XSETINT (val
, XVECTOR (disp
)->size
);
1213 else if (SINGLE_BYTE_CHAR_P (c
))
1214 XSETINT (val
, ONE_BYTE_CHAR_WIDTH (c
));
1217 int charset
= CHAR_CHARSET (c
);
1219 XSETFASTINT (val
, CHARSET_WIDTH (charset
));
1224 /* Return width of string STR of length LEN when displayed in the
1225 current buffer. The width is measured by how many columns it
1226 occupies on the screen. */
1233 unsigned char *endp
= str
+ len
;
1235 struct Lisp_Char_Table
*dp
= buffer_display_table ();
1241 int c
= STRING_CHAR_AND_LENGTH (str
, endp
- str
, thislen
);
1243 /* Get the way the display table would display it. */
1245 disp
= DISP_CHAR_VECTOR (dp
, c
);
1250 width
+= XVECTOR (disp
)->size
;
1252 width
+= ONE_BYTE_CHAR_WIDTH (*str
);
1259 DEFUN ("string-width", Fstring_width
, Sstring_width
, 1, 1, 0,
1260 "Return width of STRING when displayed in the current buffer.\n\
1261 Width is measured by how many columns it occupies on the screen.\n\
1262 When calculating width of a multibyte character in STRING,\n\
1263 only the base leading-code is considered; the validity of\n\
1264 the following bytes is not checked.")
1270 CHECK_STRING (str
, 0);
1271 XSETFASTINT (val
, strwidth (XSTRING (str
)->data
,
1272 STRING_BYTES (XSTRING (str
))));
1276 DEFUN ("char-direction", Fchar_direction
, Schar_direction
, 1, 1, 0,
1277 "Return the direction of CHAR.\n\
1278 The returned value is 0 for left-to-right and 1 for right-to-left.")
1284 CHECK_NUMBER (ch
, 0);
1285 charset
= CHAR_CHARSET (XFASTINT (ch
));
1286 if (!CHARSET_DEFINED_P (charset
))
1287 invalid_character (XINT (ch
));
1288 return CHARSET_TABLE_INFO (charset
, CHARSET_DIRECTION_IDX
);
1291 DEFUN ("chars-in-region", Fchars_in_region
, Schars_in_region
, 2, 2, 0,
1292 "Return number of characters between BEG and END.")
1294 Lisp_Object beg
, end
;
1298 CHECK_NUMBER_COERCE_MARKER (beg
, 0);
1299 CHECK_NUMBER_COERCE_MARKER (end
, 1);
1301 from
= min (XFASTINT (beg
), XFASTINT (end
));
1302 to
= max (XFASTINT (beg
), XFASTINT (end
));
1304 return make_number (to
- from
);
1307 /* Return the number of characters in the NBYTES bytes at PTR.
1308 This works by looking at the contents and checking for multibyte sequences.
1309 However, if the current buffer has enable-multibyte-characters = nil,
1310 we treat each byte as a character. */
1313 chars_in_text (ptr
, nbytes
)
1317 /* current_buffer is null at early stages of Emacs initialization. */
1318 if (current_buffer
== 0
1319 || NILP (current_buffer
->enable_multibyte_characters
))
1322 return multibyte_chars_in_text (ptr
, nbytes
);
1325 /* Return the number of characters in the NBYTES bytes at PTR.
1326 This works by looking at the contents and checking for multibyte sequences.
1327 It ignores enable-multibyte-characters. */
1330 multibyte_chars_in_text (ptr
, nbytes
)
1334 unsigned char *endp
;
1337 endp
= ptr
+ nbytes
;
1342 PARSE_MULTIBYTE_SEQ (ptr
, endp
- ptr
, bytes
);
1350 /* Parse unibyte text at STR of LEN bytes as a multibyte text, and
1351 count the numbers of characters and bytes in it. On counting
1352 bytes, pay attention to that 8-bit characters in the range
1353 0x80..0x9F are represented by 2-byte in a multibyte text. */
1355 parse_str_as_multibyte (str
, len
, nchars
, nbytes
)
1357 int len
, *nchars
, *nbytes
;
1359 unsigned char *endp
= str
+ len
;
1360 int n
, chars
= 0, bytes
= 0;
1364 if (UNIBYTE_STR_AS_MULTIBYTE_P (str
, endp
- str
, n
))
1365 str
+= n
, bytes
+= n
;
1375 /* Arrange unibyte text at STR of NBYTES bytes as a multibyte text.
1376 It actually converts only 8-bit characters in the range 0x80..0x9F
1377 that don't contruct multibyte characters to multibyte forms. If
1378 NCHARS is nonzero, set *NCHARS to the number of characters in the
1379 text. It is assured that we can use LEN bytes at STR as a work
1380 area and that is enough. Return the number of bytes of the
1384 str_as_multibyte (str
, len
, nbytes
, nchars
)
1386 int len
, nbytes
, *nchars
;
1388 unsigned char *p
= str
, *endp
= str
+ nbytes
;
1393 while (p
< endp
&& UNIBYTE_STR_AS_MULTIBYTE_P (p
, endp
- p
, n
))
1403 safe_bcopy (p
, endp
- nbytes
, nbytes
);
1407 if (UNIBYTE_STR_AS_MULTIBYTE_P (p
, endp
- p
, n
))
1414 *to
++ = LEADING_CODE_8_BIT_CONTROL
;
1415 *to
++ = *p
++ + 0x20;
1424 /* Convert unibyte text at STR of NBYTES bytes to a multibyte text
1425 that contains the same single-byte characters. It actually
1426 converts all 8-bit characters to multibyte forms. It is assured
1427 that we can use LEN bytes at STR as a work area and that is
1431 str_to_multibyte (str
, len
, bytes
)
1435 unsigned char *p
= str
, *endp
= str
+ bytes
;
1439 while (p
< endp
&& (*p
< 0x80 || *p
>= 0xA0)) p
++;
1445 safe_bcopy (p
, endp
- bytes
, bytes
);
1449 if (*p
< 0x80 || *p
>= 0xA0)
1452 *to
++ = LEADING_CODE_8_BIT_CONTROL
, *to
++ = *p
++ + 0x20;
1457 /* Arrange multibyte text at STR of LEN bytes as a unibyte text. It
1458 actually converts only 8-bit characters in the range 0x80..0x9F to
1462 str_as_unibyte (str
, bytes
)
1466 unsigned char *p
= str
, *endp
= str
+ bytes
;
1467 unsigned char *to
= str
;
1469 while (p
< endp
&& *p
!= LEADING_CODE_8_BIT_CONTROL
) p
++;
1473 if (*p
== LEADING_CODE_8_BIT_CONTROL
)
1474 *to
++ = *(p
+ 1) - 0x20, p
+= 2;
1482 DEFUN ("string", Fstring
, Sstring
, 1, MANY
, 0,
1483 "Concatenate all the argument characters and make the result a string.")
1489 unsigned char *buf
= (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH
* n
);
1490 unsigned char *p
= buf
;
1493 for (i
= 0; i
< n
; i
++)
1495 CHECK_NUMBER (args
[i
], 0);
1497 p
+= CHAR_STRING (c
, p
);
1500 return make_string_from_bytes (buf
, n
, p
- buf
);
1506 charset_id_internal (charset_name
)
1511 val
= Fget (intern (charset_name
), Qcharset
);
1513 error ("Charset %s is not defined", charset_name
);
1515 return (XINT (XVECTOR (val
)->contents
[0]));
1518 DEFUN ("setup-special-charsets", Fsetup_special_charsets
,
1519 Ssetup_special_charsets
, 0, 0, 0, "Internal use only.")
1522 charset_latin_iso8859_1
= charset_id_internal ("latin-iso8859-1");
1523 charset_jisx0208_1978
= charset_id_internal ("japanese-jisx0208-1978");
1524 charset_jisx0208
= charset_id_internal ("japanese-jisx0208");
1525 charset_katakana_jisx0201
= charset_id_internal ("katakana-jisx0201");
1526 charset_latin_jisx0201
= charset_id_internal ("latin-jisx0201");
1527 charset_big5_1
= charset_id_internal ("chinese-big5-1");
1528 charset_big5_2
= charset_id_internal ("chinese-big5-2");
1533 init_charset_once ()
1537 staticpro (&Vcharset_table
);
1538 staticpro (&Vcharset_symbol_table
);
1539 staticpro (&Vgeneric_character_list
);
1541 /* This has to be done here, before we call Fmake_char_table. */
1542 Qcharset_table
= intern ("charset-table");
1543 staticpro (&Qcharset_table
);
1545 /* Intern this now in case it isn't already done.
1546 Setting this variable twice is harmless.
1547 But don't staticpro it here--that is done in alloc.c. */
1548 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
1550 /* Now we are ready to set up this property, so we can
1551 create the charset table. */
1552 Fput (Qcharset_table
, Qchar_table_extra_slots
, make_number (0));
1553 Vcharset_table
= Fmake_char_table (Qcharset_table
, Qnil
);
1555 Qunknown
= intern ("unknown");
1556 staticpro (&Qunknown
);
1557 Vcharset_symbol_table
= Fmake_vector (make_number (MAX_CHARSET
+ 1),
1561 for (i
= 0; i
< 2; i
++)
1562 for (j
= 0; j
< 2; j
++)
1563 for (k
= 0; k
< 128; k
++)
1564 iso_charset_table
[i
][j
][k
] = -1;
1566 for (i
= 0; i
< 256; i
++)
1567 bytes_by_char_head
[i
] = 1;
1568 for (i
= MIN_CHARSET_OFFICIAL_DIMENSION1
;
1569 i
<= MAX_CHARSET_OFFICIAL_DIMENSION1
; i
++)
1570 bytes_by_char_head
[i
] = 2;
1571 for (i
= MIN_CHARSET_OFFICIAL_DIMENSION2
;
1572 i
<= MAX_CHARSET_OFFICIAL_DIMENSION2
; i
++)
1573 bytes_by_char_head
[i
] = 3;
1574 bytes_by_char_head
[LEADING_CODE_PRIVATE_11
] = 3;
1575 bytes_by_char_head
[LEADING_CODE_PRIVATE_12
] = 3;
1576 bytes_by_char_head
[LEADING_CODE_PRIVATE_21
] = 4;
1577 bytes_by_char_head
[LEADING_CODE_PRIVATE_22
] = 4;
1578 bytes_by_char_head
[LEADING_CODE_8_BIT_CONTROL
] = 2;
1580 for (i
= 0; i
< 128; i
++)
1581 width_by_char_head
[i
] = 1;
1582 for (; i
< 256; i
++)
1583 width_by_char_head
[i
] = 4;
1584 width_by_char_head
[LEADING_CODE_PRIVATE_11
] = 1;
1585 width_by_char_head
[LEADING_CODE_PRIVATE_12
] = 2;
1586 width_by_char_head
[LEADING_CODE_PRIVATE_21
] = 1;
1587 width_by_char_head
[LEADING_CODE_PRIVATE_22
] = 2;
1593 for (i
= 0x81; i
< 0x90; i
++)
1594 val
= Fcons (make_number ((i
- 0x70) << 7), val
);
1595 for (; i
< 0x9A; i
++)
1596 val
= Fcons (make_number ((i
- 0x8F) << 14), val
);
1597 for (i
= 0xA0; i
< 0xF0; i
++)
1598 val
= Fcons (make_number ((i
- 0x70) << 7), val
);
1599 for (; i
< 0xFF; i
++)
1600 val
= Fcons (make_number ((i
- 0xE0) << 14), val
);
1601 Vgeneric_character_list
= Fnreverse (val
);
1604 nonascii_insert_offset
= 0;
1605 Vnonascii_translation_table
= Qnil
;
1613 Qcharset
= intern ("charset");
1614 staticpro (&Qcharset
);
1616 Qascii
= intern ("ascii");
1617 staticpro (&Qascii
);
1619 Qeight_bit_control
= intern ("eight-bit-control");
1620 staticpro (&Qeight_bit_control
);
1622 Qeight_bit_graphic
= intern ("eight-bit-graphic");
1623 staticpro (&Qeight_bit_graphic
);
1625 /* Define special charsets ascii, eight-bit-control, and
1626 eight-bit-graphic. */
1627 update_charset_table (make_number (CHARSET_ASCII
),
1628 make_number (1), make_number (94),
1633 build_string ("ASCII"),
1634 build_string ("ASCII"),
1635 build_string ("ASCII (ISO646 IRV)"));
1636 CHARSET_SYMBOL (CHARSET_ASCII
) = Qascii
;
1637 Fput (Qascii
, Qcharset
, CHARSET_TABLE_ENTRY (CHARSET_ASCII
));
1639 update_charset_table (make_number (CHARSET_8_BIT_CONTROL
),
1640 make_number (1), make_number (96),
1645 build_string ("8-bit control code (0x80..0x9F)"),
1646 build_string ("8-bit control code (0x80..0x9F)"),
1647 build_string ("8-bit control code (0x80..0x9F)"));
1648 CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL
) = Qeight_bit_control
;
1649 Fput (Qeight_bit_control
, Qcharset
,
1650 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL
));
1652 update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC
),
1653 make_number (1), make_number (96),
1658 build_string ("8-bit graphic char"),
1659 build_string ("8-bit graphic char (0xA0..0xFF)"),
1660 build_string ("8-bit graphic char (0xA0..0xFF)"));
1661 CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC
) = Qeight_bit_graphic
;
1662 Fput (Qeight_bit_graphic
, Qcharset
,
1663 CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC
));
1665 Qauto_fill_chars
= intern ("auto-fill-chars");
1666 staticpro (&Qauto_fill_chars
);
1667 Fput (Qauto_fill_chars
, Qchar_table_extra_slots
, make_number (0));
1669 defsubr (&Sdefine_charset
);
1670 defsubr (&Sgeneric_character_list
);
1671 defsubr (&Sget_unused_iso_final_char
);
1672 defsubr (&Sdeclare_equiv_charset
);
1673 defsubr (&Sfind_charset_region
);
1674 defsubr (&Sfind_charset_string
);
1675 defsubr (&Smake_char_internal
);
1676 defsubr (&Ssplit_char
);
1677 defsubr (&Schar_charset
);
1678 defsubr (&Scharset_after
);
1679 defsubr (&Siso_charset
);
1680 defsubr (&Schar_valid_p
);
1681 defsubr (&Sunibyte_char_to_multibyte
);
1682 defsubr (&Smultibyte_char_to_unibyte
);
1683 defsubr (&Schar_bytes
);
1684 defsubr (&Schar_width
);
1685 defsubr (&Sstring_width
);
1686 defsubr (&Schar_direction
);
1687 defsubr (&Schars_in_region
);
1689 defsubr (&Ssetup_special_charsets
);
1691 DEFVAR_LISP ("charset-list", &Vcharset_list
,
1692 "List of charsets ever defined.");
1693 Vcharset_list
= Fcons (Qascii
, Fcons (Qeight_bit_control
,
1694 Fcons (Qeight_bit_graphic
, Qnil
)));
1696 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector
,
1697 "Vector of cons cell of a symbol and translation table ever defined.\n\
1698 An ID of a translation table is an index of this vector.");
1699 Vtranslation_table_vector
= Fmake_vector (make_number (16), Qnil
);
1701 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11
,
1702 "Leading-code of private TYPE9N charset of column-width 1.");
1703 leading_code_private_11
= LEADING_CODE_PRIVATE_11
;
1705 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12
,
1706 "Leading-code of private TYPE9N charset of column-width 2.");
1707 leading_code_private_12
= LEADING_CODE_PRIVATE_12
;
1709 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21
,
1710 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1711 leading_code_private_21
= LEADING_CODE_PRIVATE_21
;
1713 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22
,
1714 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1715 leading_code_private_22
= LEADING_CODE_PRIVATE_22
;
1717 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset
,
1718 "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
1719 This is used for converting unibyte text to multibyte,\n\
1720 and for inserting character codes specified by number.\n\n\
1721 This serves to convert a Latin-1 or similar 8-bit character code\n\
1722 to the corresponding Emacs multibyte character code.\n\
1723 Typically the value should be (- (make-char CHARSET 0) 128),\n\
1724 for your choice of character set.\n\
1725 If `nonascii-translation-table' is non-nil, it overrides this variable.");
1726 nonascii_insert_offset
= 0;
1728 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table
,
1729 "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
1730 This is used for converting unibyte text to multibyte,\n\
1731 and for inserting character codes specified by number.\n\n\
1732 Conversion is performed only when multibyte characters are enabled,\n\
1733 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1734 to the corresponding Emacs character code.\n\n\
1735 If this is nil, `nonascii-insert-offset' is used instead.\n\
1736 See also the docstring of `make-translation-table'.");
1737 Vnonascii_translation_table
= Qnil
;
1739 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars
,
1740 "A char-table for characters which invoke auto-filling.\n\
1741 Such characters have value t in this table.");
1742 Vauto_fill_chars
= Fmake_char_table (Qauto_fill_chars
, Qnil
);
1743 CHAR_TABLE_SET (Vauto_fill_chars
, make_number (' '), Qt
);
1744 CHAR_TABLE_SET (Vauto_fill_chars
, make_number ('\n'), Qt
);