1 /* Basic multilingual character support.
2 Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
3 Licensed to the Free Software Foundation.
5 This file is part of GNU Emacs.
7 GNU Emacs is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
12 GNU Emacs is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GNU Emacs; see the file COPYING. If not, write to
19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
20 Boston, MA 02111-1307, USA. */
22 /* At first, see the document in `charset.h' to understand the code in
29 #include <sys/types.h>
43 Lisp_Object Qcharset
, Qascii
, Qcomposition
;
45 /* Declaration of special leading-codes. */
46 int leading_code_composition
; /* for composite characters */
47 int leading_code_private_11
; /* for private DIMENSION1 of 1-column */
48 int leading_code_private_12
; /* for private DIMENSION1 of 2-column */
49 int leading_code_private_21
; /* for private DIMENSION2 of 1-column */
50 int leading_code_private_22
; /* for private DIMENSION2 of 2-column */
52 /* Declaration of special charsets. */
53 int charset_ascii
; /* ASCII */
54 int charset_composition
; /* for a composite character */
55 int charset_latin_iso8859_1
; /* ISO8859-1 (Latin-1) */
56 int charset_jisx0208_1978
; /* JISX0208.1978 (Japanese Kanji old set) */
57 int charset_jisx0208
; /* JISX0208.1983 (Japanese Kanji) */
58 int charset_katakana_jisx0201
; /* JISX0201.Kana (Japanese Katakana) */
59 int charset_latin_jisx0201
; /* JISX0201.Roman (Japanese Roman) */
60 int charset_big5_1
; /* Big5 Level 1 (Chinese Traditional) */
61 int charset_big5_2
; /* Big5 Level 2 (Chinese Traditional) */
63 int min_composite_char
;
65 Lisp_Object Qcharset_table
;
67 /* A char-table containing information of each character set. */
68 Lisp_Object Vcharset_table
;
70 /* A vector of charset symbol indexed by charset-id. This is used
71 only for returning charset symbol from C functions. */
72 Lisp_Object Vcharset_symbol_table
;
74 /* A list of charset symbols ever defined. */
75 Lisp_Object Vcharset_list
;
77 /* Vector of translation table ever defined.
78 ID of a translation table is used to index this vector. */
79 Lisp_Object Vtranslation_table_vector
;
81 /* A char-table for characters which may invoke auto-filling. */
82 Lisp_Object Vauto_fill_chars
;
84 Lisp_Object Qauto_fill_chars
;
86 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD. */
87 int bytes_by_char_head
[256];
88 int width_by_char_head
[256];
90 /* Mapping table from ISO2022's charset (specified by DIMENSION,
91 CHARS, and FINAL-CHAR) to Emacs' charset. */
92 int iso_charset_table
[2][2][128];
94 /* Table of pointers to the structure `cmpchar_info' indexed by
96 struct cmpchar_info
**cmpchar_table
;
97 /* The current size of `cmpchar_table'. */
98 static int cmpchar_table_size
;
99 /* Number of the current composite characters. */
102 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
103 unsigned char *_fetch_multibyte_char_p
;
104 int _fetch_multibyte_char_len
;
106 /* Offset to add to a non-ASCII value when inserting it. */
107 int nonascii_insert_offset
;
109 /* Translation table for converting non-ASCII unibyte characters
110 to multibyte codes, or nil. */
111 Lisp_Object Vnonascii_translation_table
;
113 /* List of all possible generic characters. */
114 Lisp_Object Vgeneric_character_list
;
116 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
117 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
120 invalid_character (c
)
123 error ("Invalid character: 0%o, %d, 0x%x", c
, c
, c
);
127 /* Set STR a pointer to the multi-byte form of the character C. If C
128 is not a composite character, the multi-byte form is set in WORKBUF
129 and STR points WORKBUF. The caller should allocate at least 4-byte
130 area at WORKBUF in advance. Returns the length of the multi-byte
131 form. If C is an invalid character to have a multi-byte form,
134 Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
135 function directly if C can be an ASCII character. */
138 non_ascii_char_to_string (c
, workbuf
, str
)
140 unsigned char *workbuf
, **str
;
145 invalid_character (c
);
147 if (COMPOSITE_CHAR_P (c
))
149 int cmpchar_id
= COMPOSITE_CHAR_ID (c
);
151 if (cmpchar_id
< n_cmpchars
)
153 *str
= cmpchar_table
[cmpchar_id
]->data
;
154 return cmpchar_table
[cmpchar_id
]->len
;
158 invalid_character (c
);
162 SPLIT_NON_ASCII_CHAR (c
, charset
, c1
, c2
);
164 || ! CHARSET_DEFINED_P (charset
)
165 || c1
>= 0 && c1
< 32
166 || c2
>= 0 && c2
< 32)
167 invalid_character (c
);
170 *workbuf
++ = CHARSET_LEADING_CODE_BASE (charset
);
171 if (*workbuf
= CHARSET_LEADING_CODE_EXT (charset
))
173 *workbuf
++ = c1
| 0x80;
175 *workbuf
++ = c2
| 0x80;
177 return (workbuf
- *str
);
180 /* Return a non-ASCII character of which multi-byte form is at STR of
181 length LEN. If ACTUAL_LEN is not NULL, the actual length of the
182 multibyte form is set to the address ACTUAL_LEN.
184 If exclude_tail_garbage is nonzero, ACTUAL_LEN excludes gabage
185 bytes following the non-ASCII character.
187 Use macro `STRING_CHAR (STR, LEN)' instead of calling this function
188 directly if STR can hold an ASCII character. */
191 string_to_non_ascii_char (str
, len
, actual_len
, exclude_tail_garbage
)
192 const unsigned char *str
;
193 int len
, *actual_len
, exclude_tail_garbage
;
196 unsigned char c1
, c2
;
198 const unsigned char *begp
= str
;
203 if (BASE_LEADING_CODE_P (c
))
205 while (bytes
< len
&& ! CHAR_HEAD_P (begp
[bytes
])) bytes
++;
207 if (c
== LEADING_CODE_COMPOSITION
)
209 int cmpchar_id
= str_cmpchar_id (begp
, bytes
);
213 c
= MAKE_COMPOSITE_CHAR (cmpchar_id
);
214 str
+= cmpchar_table
[cmpchar_id
]->len
- 1;
221 const unsigned char *endp
= begp
+ bytes
;
222 int charset
= c
, c1
, c2
= 0;
224 if (str
>= endp
) break;
225 if (c
>= LEADING_CODE_PRIVATE_11
&& c
<= LEADING_CODE_PRIVATE_22
)
231 c1
= charset
, charset
= c
;
235 if (CHARSET_DEFINED_P (charset
)
236 && CHARSET_DIMENSION (charset
) == 2
239 c
= MAKE_NON_ASCII_CHAR (charset
, c1
, c2
);
244 *actual_len
= exclude_tail_garbage
? str
- begp
: bytes
;
248 /* Return the length of the multi-byte form at string STR of length LEN. */
250 multibyte_form_length (str
, len
)
251 const unsigned char *str
;
256 if (BASE_LEADING_CODE_P (*str
))
257 while (bytes
< len
&& ! CHAR_HEAD_P (str
[bytes
])) bytes
++;
262 /* Check if string STR of length LEN contains valid multi-byte form of
263 a character. If valid, charset and position codes of the character
264 is set at *CHARSET, *C1, and *C2, and return 0. If not valid,
265 return -1. This should be used only in the macro SPLIT_STRING
266 which checks range of STR in advance. */
269 split_non_ascii_string (str
, len
, charset
, c1
, c2
)
270 register const unsigned char *str
;
271 register unsigned char *c1
, *c2
;
272 register int len
, *charset
;
274 register unsigned int cs
= *str
++;
276 if (cs
== LEADING_CODE_COMPOSITION
)
278 int cmpchar_id
= str_cmpchar_id (str
- 1, len
);
282 *charset
= cs
, *c1
= cmpchar_id
>> 7, *c2
= cmpchar_id
& 0x7F;
284 else if ((cs
< LEADING_CODE_PRIVATE_11
|| (cs
= *str
++) >= 0xA0)
285 && CHARSET_DEFINED_P (cs
))
290 *c1
= (*str
++) & 0x7F;
291 if (CHARSET_DIMENSION (cs
) == 2)
295 *c2
= (*str
++) & 0x7F;
303 /* Translate character C by translation table TABLE. If C
304 is negative, translate a character specified by CHARSET, C1, and C2
305 (C1 and C2 are code points of the character). If no translation is
306 found in TABLE, return C. */
308 translate_char (table
, c
, charset
, c1
, c2
)
310 int c
, charset
, c1
, c2
;
313 int alt_charset
, alt_c1
, alt_c2
, dimension
;
315 if (c
< 0) c
= MAKE_CHAR (charset
, c1
, c2
);
316 if (!CHAR_TABLE_P (table
)
317 || (ch
= Faref (table
, make_number (c
)), !INTEGERP (ch
))
321 SPLIT_CHAR (XFASTINT (ch
), alt_charset
, alt_c1
, alt_c2
);
322 dimension
= CHARSET_DIMENSION (alt_charset
);
323 if (dimension
== 1 && alt_c1
> 0 || dimension
== 2 && alt_c2
> 0)
324 /* CH is not a generic character, just return it. */
325 return XFASTINT (ch
);
327 /* Since CH is a generic character, we must return a specific
328 charater which has the same position codes as C from CH. */
330 SPLIT_CHAR (c
, charset
, c1
, c2
);
331 if (dimension
!= CHARSET_DIMENSION (charset
))
332 /* We can't make such a character because of dimension mismatch. */
334 return MAKE_CHAR (alt_charset
, c1
, c2
);
337 /* Convert the unibyte character C to multibyte based on
338 Vnonascii_translation_table or nonascii_insert_offset. If they can't
339 convert C to a valid multibyte character, convert it based on
340 DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character. */
343 unibyte_char_to_multibyte (c
)
350 if (! NILP (Vnonascii_translation_table
))
352 c
= XINT (Faref (Vnonascii_translation_table
, make_number (c
)));
353 if (c
>= 0400 && ! VALID_MULTIBYTE_CHAR_P (c
))
354 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
356 else if (c
>= 0240 && nonascii_insert_offset
> 0)
358 c
+= nonascii_insert_offset
;
359 if (c
< 0400 || ! VALID_MULTIBYTE_CHAR_P (c
))
360 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
363 c
= c_save
+ DEFAULT_NONASCII_INSERT_OFFSET
;
369 /* Convert the multibyte character C to unibyte 8-bit character based
370 on Vnonascii_translation_table or nonascii_insert_offset. If
371 REV_TBL is non-nil, it should be a reverse table of
372 Vnonascii_translation_table, i.e. what given by:
373 Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0)) */
376 multibyte_char_to_unibyte (c
, rev_tbl
)
380 if (!SINGLE_BYTE_CHAR_P (c
))
384 if (! CHAR_TABLE_P (rev_tbl
)
385 && CHAR_TABLE_P (Vnonascii_translation_table
))
386 rev_tbl
= Fchar_table_extra_slot (Vnonascii_translation_table
,
388 if (CHAR_TABLE_P (rev_tbl
))
391 temp
= Faref (rev_tbl
, make_number (c
));
395 c
= (c_save
& 0177) + 0200;
399 if (nonascii_insert_offset
> 0)
400 c
-= nonascii_insert_offset
;
401 if (c
< 128 || c
>= 256)
402 c
= (c_save
& 0177) + 0200;
410 /* Update the table Vcharset_table with the given arguments (see the
411 document of `define-charset' for the meaning of each argument).
412 Several other table contents are also updated. The caller should
413 check the validity of CHARSET-ID and the remaining arguments in
417 update_charset_table (charset_id
, dimension
, chars
, width
, direction
,
418 iso_final_char
, iso_graphic_plane
,
419 short_name
, long_name
, description
)
420 Lisp_Object charset_id
, dimension
, chars
, width
, direction
;
421 Lisp_Object iso_final_char
, iso_graphic_plane
;
422 Lisp_Object short_name
, long_name
, description
;
424 int charset
= XINT (charset_id
);
426 unsigned char leading_code_base
, leading_code_ext
;
428 if (NILP (CHARSET_TABLE_ENTRY (charset
)))
429 CHARSET_TABLE_ENTRY (charset
)
430 = Fmake_vector (make_number (CHARSET_MAX_IDX
), Qnil
);
432 /* Get byte length of multibyte form, base leading-code, and
433 extended leading-code of the charset. See the comment under the
434 title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h. */
435 bytes
= XINT (dimension
);
436 if (charset
< MIN_CHARSET_PRIVATE_DIMENSION1
)
438 /* Official charset, it doesn't have an extended leading-code. */
439 if (charset
!= CHARSET_ASCII
)
440 bytes
+= 1; /* For a base leading-code. */
441 leading_code_base
= charset
;
442 leading_code_ext
= 0;
446 /* Private charset. */
447 bytes
+= 2; /* For base and extended leading-codes. */
449 = (charset
< LEADING_CODE_EXT_12
450 ? LEADING_CODE_PRIVATE_11
451 : (charset
< LEADING_CODE_EXT_21
452 ? LEADING_CODE_PRIVATE_12
453 : (charset
< LEADING_CODE_EXT_22
454 ? LEADING_CODE_PRIVATE_21
455 : LEADING_CODE_PRIVATE_22
)));
456 leading_code_ext
= charset
;
459 if (BYTES_BY_CHAR_HEAD (leading_code_base
) != bytes
)
460 error ("Invalid dimension for the charset-ID %d", charset
);
462 CHARSET_TABLE_INFO (charset
, CHARSET_ID_IDX
) = charset_id
;
463 CHARSET_TABLE_INFO (charset
, CHARSET_BYTES_IDX
) = make_number (bytes
);
464 CHARSET_TABLE_INFO (charset
, CHARSET_DIMENSION_IDX
) = dimension
;
465 CHARSET_TABLE_INFO (charset
, CHARSET_CHARS_IDX
) = chars
;
466 CHARSET_TABLE_INFO (charset
, CHARSET_WIDTH_IDX
) = width
;
467 CHARSET_TABLE_INFO (charset
, CHARSET_DIRECTION_IDX
) = direction
;
468 CHARSET_TABLE_INFO (charset
, CHARSET_LEADING_CODE_BASE_IDX
)
469 = make_number (leading_code_base
);
470 CHARSET_TABLE_INFO (charset
, CHARSET_LEADING_CODE_EXT_IDX
)
471 = make_number (leading_code_ext
);
472 CHARSET_TABLE_INFO (charset
, CHARSET_ISO_FINAL_CHAR_IDX
) = iso_final_char
;
473 CHARSET_TABLE_INFO (charset
, CHARSET_ISO_GRAPHIC_PLANE_IDX
)
475 CHARSET_TABLE_INFO (charset
, CHARSET_SHORT_NAME_IDX
) = short_name
;
476 CHARSET_TABLE_INFO (charset
, CHARSET_LONG_NAME_IDX
) = long_name
;
477 CHARSET_TABLE_INFO (charset
, CHARSET_DESCRIPTION_IDX
) = description
;
478 CHARSET_TABLE_INFO (charset
, CHARSET_PLIST_IDX
) = Qnil
;
481 /* If we have already defined a charset which has the same
482 DIMENSION, CHARS and ISO-FINAL-CHAR but the different
483 DIRECTION, we must update the entry REVERSE-CHARSET of both
484 charsets. If there's no such charset, the value of the entry
488 for (i
= 0; i
<= MAX_CHARSET
; i
++)
489 if (!NILP (CHARSET_TABLE_ENTRY (i
)))
491 if (CHARSET_DIMENSION (i
) == XINT (dimension
)
492 && CHARSET_CHARS (i
) == XINT (chars
)
493 && CHARSET_ISO_FINAL_CHAR (i
) == XINT (iso_final_char
)
494 && CHARSET_DIRECTION (i
) != XINT (direction
))
496 CHARSET_TABLE_INFO (charset
, CHARSET_REVERSE_CHARSET_IDX
)
498 CHARSET_TABLE_INFO (i
, CHARSET_REVERSE_CHARSET_IDX
) = charset_id
;
503 /* No such a charset. */
504 CHARSET_TABLE_INFO (charset
, CHARSET_REVERSE_CHARSET_IDX
)
508 if (charset
!= CHARSET_ASCII
509 && charset
< MIN_CHARSET_PRIVATE_DIMENSION1
)
511 width_by_char_head
[leading_code_base
] = XINT (width
);
513 /* Update table emacs_code_class. */
514 emacs_code_class
[charset
] = (bytes
== 2
515 ? EMACS_leading_code_2
517 ? EMACS_leading_code_3
518 : EMACS_leading_code_4
));
521 /* Update table iso_charset_table. */
522 if (ISO_CHARSET_TABLE (dimension
, chars
, iso_final_char
) < 0)
523 ISO_CHARSET_TABLE (dimension
, chars
, iso_final_char
) = charset
;
528 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
531 get_charset_id (charset_symbol
)
532 Lisp_Object charset_symbol
;
537 return ((SYMBOLP (charset_symbol
)
538 && (val
= Fget (charset_symbol
, Qcharset
), VECTORP (val
))
539 && (charset
= XINT (XVECTOR (val
)->contents
[CHARSET_ID_IDX
]),
540 CHARSET_VALID_P (charset
)))
544 /* Return an identification number for a new private charset of
545 DIMENSION and WIDTH. If there's no more room for the new charset,
548 get_new_private_charset_id (dimension
, width
)
549 int dimension
, width
;
551 int charset
, from
, to
;
556 from
= LEADING_CODE_EXT_11
, to
= LEADING_CODE_EXT_12
;
558 from
= LEADING_CODE_EXT_12
, to
= LEADING_CODE_EXT_21
;
563 from
= LEADING_CODE_EXT_21
, to
= LEADING_CODE_EXT_22
;
565 from
= LEADING_CODE_EXT_22
, to
= LEADING_CODE_EXT_MAX
+ 1;
568 for (charset
= from
; charset
< to
; charset
++)
569 if (!CHARSET_DEFINED_P (charset
)) break;
571 return make_number (charset
< to
? charset
: 0);
574 DEFUN ("define-charset", Fdefine_charset
, Sdefine_charset
, 3, 3, 0,
575 "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
576 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
577 treated as a private charset.\n\
578 INFO-VECTOR is a vector of the format:\n\
579 [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
580 SHORT-NAME LONG-NAME DESCRIPTION]\n\
581 The meanings of each elements is as follows:\n\
582 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
583 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
584 WIDTH (integer) is the number of columns a character in the charset\n\
585 occupies on the screen: one of 0, 1, and 2.\n\
587 DIRECTION (integer) is the rendering direction of characters in the\n\
588 charset when rendering. If 0, render from left to right, else\n\
589 render from right to left.\n\
591 ISO-FINAL-CHAR (character) is the final character of the\n\
592 corresponding ISO 2022 charset.\n\
594 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
595 while encoding to variants of ISO 2022 coding system, one of the\n\
596 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
598 SHORT-NAME (string) is the short name to refer to the charset.\n\
600 LONG-NAME (string) is the long name to refer to the charset.\n\
602 DESCRIPTION (string) is the description string of the charset.")
603 (charset_id
, charset_symbol
, info_vector
)
604 Lisp_Object charset_id
, charset_symbol
, info_vector
;
608 if (!NILP (charset_id
))
609 CHECK_NUMBER (charset_id
, 0);
610 CHECK_SYMBOL (charset_symbol
, 1);
611 CHECK_VECTOR (info_vector
, 2);
613 if (! NILP (charset_id
))
615 if (! CHARSET_VALID_P (XINT (charset_id
)))
616 error ("Invalid CHARSET: %d", XINT (charset_id
));
617 else if (CHARSET_DEFINED_P (XINT (charset_id
)))
618 error ("Already defined charset: %d", XINT (charset_id
));
621 vec
= XVECTOR (info_vector
)->contents
;
622 if (XVECTOR (info_vector
)->size
!= 9
623 || !INTEGERP (vec
[0]) || !(XINT (vec
[0]) == 1 || XINT (vec
[0]) == 2)
624 || !INTEGERP (vec
[1]) || !(XINT (vec
[1]) == 94 || XINT (vec
[1]) == 96)
625 || !INTEGERP (vec
[2]) || !(XINT (vec
[2]) == 1 || XINT (vec
[2]) == 2)
626 || !INTEGERP (vec
[3]) || !(XINT (vec
[3]) == 0 || XINT (vec
[3]) == 1)
627 || !INTEGERP (vec
[4]) || !(XINT (vec
[4]) >= '0' && XINT (vec
[4]) <= '~')
628 || !INTEGERP (vec
[5]) || !(XINT (vec
[5]) == 0 || XINT (vec
[5]) == 1)
631 || !STRINGP (vec
[8]))
632 error ("Invalid info-vector argument for defining charset %s",
633 XSYMBOL (charset_symbol
)->name
->data
);
635 if (NILP (charset_id
))
637 charset_id
= get_new_private_charset_id (XINT (vec
[0]), XINT (vec
[2]));
638 if (XINT (charset_id
) == 0)
639 error ("There's no room for a new private charset %s",
640 XSYMBOL (charset_symbol
)->name
->data
);
643 update_charset_table (charset_id
, vec
[0], vec
[1], vec
[2], vec
[3],
644 vec
[4], vec
[5], vec
[6], vec
[7], vec
[8]);
645 Fput (charset_symbol
, Qcharset
, CHARSET_TABLE_ENTRY (XINT (charset_id
)));
646 CHARSET_SYMBOL (XINT (charset_id
)) = charset_symbol
;
647 Vcharset_list
= Fcons (charset_symbol
, Vcharset_list
);
651 DEFUN ("generic-character-list", Fgeneric_character_list
,
652 Sgeneric_character_list
, 0, 0, 0,
653 "Return a list of all possible generic characters.\n\
654 It includes a generic character for a charset not yet defined.")
657 return Vgeneric_character_list
;
660 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char
,
661 Sget_unused_iso_final_char
, 2, 2, 0,
662 "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
663 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
664 CHARS is the number of characters in a dimension: 94 or 96.\n\
666 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
667 If there's no unused final char for the specified kind of charset,\n\
670 Lisp_Object dimension
, chars
;
674 CHECK_NUMBER (dimension
, 0);
675 CHECK_NUMBER (chars
, 1);
676 if (XINT (dimension
) != 1 && XINT (dimension
) != 2)
677 error ("Invalid charset dimension %d, it should be 1 or 2",
679 if (XINT (chars
) != 94 && XINT (chars
) != 96)
680 error ("Invalid charset chars %d, it should be 94 or 96",
682 for (final_char
= '0'; final_char
<= '?'; final_char
++)
684 if (ISO_CHARSET_TABLE (dimension
, chars
, make_number (final_char
)) < 0)
687 return (final_char
<= '?' ? make_number (final_char
) : Qnil
);
690 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset
, Sdeclare_equiv_charset
,
692 "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
693 CHARSET should be defined by `defined-charset' in advance.")
694 (dimension
, chars
, final_char
, charset_symbol
)
695 Lisp_Object dimension
, chars
, final_char
, charset_symbol
;
699 CHECK_NUMBER (dimension
, 0);
700 CHECK_NUMBER (chars
, 1);
701 CHECK_NUMBER (final_char
, 2);
702 CHECK_SYMBOL (charset_symbol
, 3);
704 if (XINT (dimension
) != 1 && XINT (dimension
) != 2)
705 error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension
));
706 if (XINT (chars
) != 94 && XINT (chars
) != 96)
707 error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars
));
708 if (XINT (final_char
) < '0' || XFASTINT (final_char
) > '~')
709 error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars
));
710 if ((charset
= get_charset_id (charset_symbol
)) < 0)
711 error ("Invalid charset %s", XSYMBOL (charset_symbol
)->name
->data
);
713 ISO_CHARSET_TABLE (dimension
, chars
, final_char
) = charset
;
717 /* Return number of different charsets in STR of length LEN. In
718 addition, for each found charset N, CHARSETS[N] is set 1. The
719 caller should allocate CHARSETS (MAX_CHARSET + 1 elements) in advance.
720 It may lookup a translation table TABLE if supplied.
722 If CMPCHARP is nonzero and some composite character is found,
723 CHARSETS[128] is also set 1 and the returned number is incremented
727 find_charset_in_str (str
, len
, charsets
, table
, cmpcharp
)
733 register int num
= 0, c
;
735 if (! CHAR_TABLE_P (table
))
743 if (c
== LEADING_CODE_COMPOSITION
)
745 int cmpchar_id
= str_cmpchar_id (str
, len
);
750 struct cmpchar_info
*cmp_p
= cmpchar_table
[cmpchar_id
];
753 for (i
= 0; i
< cmp_p
->glyph_len
; i
++)
758 if ((c
= translate_char (table
, c
, 0, 0, 0)) < 0)
761 if ((charset
= CHAR_CHARSET (c
)) < 0)
762 charset
= CHARSET_ASCII
;
763 if (!charsets
[charset
])
765 charsets
[charset
] = 1;
771 if (cmpcharp
&& !charsets
[CHARSET_COMPOSITION
])
773 charsets
[CHARSET_COMPOSITION
] = 1;
779 charset
= CHARSET_ASCII
;
784 c
= STRING_CHAR_AND_LENGTH (str
, len
, bytes
);
787 int c1
= translate_char (table
, c
, 0, 0, 0);
791 charset
= CHAR_CHARSET (c
);
794 if (!charsets
[charset
])
796 charsets
[charset
] = 1;
805 DEFUN ("find-charset-region", Ffind_charset_region
, Sfind_charset_region
,
807 "Return a list of charsets in the region between BEG and END.\n\
808 BEG and END are buffer positions.\n\
809 If the region contains any composite character,\n\
810 `composition' is included in the returned list.\n\
811 Optional arg TABLE if non-nil is a translation table to look up.")
813 Lisp_Object beg
, end
, table
;
815 int charsets
[MAX_CHARSET
+ 1];
816 int from
, from_byte
, to
, stop
, stop_byte
, i
;
819 validate_region (&beg
, &end
);
820 from
= XFASTINT (beg
);
821 stop
= to
= XFASTINT (end
);
823 if (NILP (current_buffer
->enable_multibyte_characters
))
826 : Fcons (Qascii
, Qnil
));
828 if (from
< GPT
&& GPT
< to
)
831 stop_byte
= GPT_BYTE
;
834 stop_byte
= CHAR_TO_BYTE (stop
);
836 from_byte
= CHAR_TO_BYTE (from
);
838 bzero (charsets
, (MAX_CHARSET
+ 1) * sizeof (int));
841 find_charset_in_str (BYTE_POS_ADDR (from_byte
), stop_byte
- from_byte
,
845 from
= stop
, from_byte
= stop_byte
;
846 stop
= to
, stop_byte
= CHAR_TO_BYTE (stop
);
853 for (i
= MAX_CHARSET
; i
>= 0; i
--)
855 val
= Fcons (CHARSET_SYMBOL (i
), val
);
859 DEFUN ("find-charset-string", Ffind_charset_string
, Sfind_charset_string
,
861 "Return a list of charsets in STR.\n\
862 If the string contains any composite characters,\n\
863 `composition' is included in the returned list.\n\
864 Optional arg TABLE if non-nil is a translation table to look up.")
866 Lisp_Object str
, table
;
868 int charsets
[MAX_CHARSET
+ 1];
872 CHECK_STRING (str
, 0);
874 if (! STRING_MULTIBYTE (str
))
875 return (XSTRING (str
)->size
== 0
877 : Fcons (Qascii
, Qnil
));
879 bzero (charsets
, (MAX_CHARSET
+ 1) * sizeof (int));
880 find_charset_in_str (XSTRING (str
)->data
, STRING_BYTES (XSTRING (str
)),
883 for (i
= MAX_CHARSET
; i
>= 0; i
--)
885 val
= Fcons (CHARSET_SYMBOL (i
), val
);
889 DEFUN ("make-char-internal", Fmake_char_internal
, Smake_char_internal
, 1, 3, 0,
891 (charset
, code1
, code2
)
892 Lisp_Object charset
, code1
, code2
;
894 CHECK_NUMBER (charset
, 0);
897 XSETFASTINT (code1
, 0);
899 CHECK_NUMBER (code1
, 1);
901 XSETFASTINT (code2
, 0);
903 CHECK_NUMBER (code2
, 2);
905 if (!CHARSET_DEFINED_P (XINT (charset
)))
906 error ("Invalid charset: %d", XINT (charset
));
908 return make_number (MAKE_CHAR (XINT (charset
), XINT (code1
), XINT (code2
)));
911 DEFUN ("split-char", Fsplit_char
, Ssplit_char
, 1, 1, 0,
912 "Return list of charset and one or two position-codes of CHAR.")
919 CHECK_NUMBER (ch
, 0);
920 SPLIT_CHAR (XFASTINT (ch
), charset
, c1
, c2
);
922 ? Fcons (CHARSET_SYMBOL (charset
),
923 Fcons (make_number (c1
), Fcons (make_number (c2
), Qnil
)))
924 : Fcons (CHARSET_SYMBOL (charset
), Fcons (make_number (c1
), Qnil
)));
927 DEFUN ("char-charset", Fchar_charset
, Schar_charset
, 1, 1, 0,
928 "Return charset of CHAR.")
932 CHECK_NUMBER (ch
, 0);
934 return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch
)));
937 DEFUN ("charset-after", Fcharset_after
, Scharset_after
, 0, 1, 0,
938 "Return charset of a character in current buffer at position POS.\n\
939 If POS is nil, it defauls to the current point.")
943 register int pos_byte
, c
, charset
;
944 register unsigned char *p
;
948 else if (MARKERP (pos
))
949 pos_byte
= marker_byte_position (pos
);
952 CHECK_NUMBER (pos
, 0);
953 pos_byte
= CHAR_TO_BYTE (XINT (pos
));
955 p
= BYTE_POS_ADDR (pos_byte
);
956 c
= STRING_CHAR (p
, Z_BYTE
- pos_byte
);
957 charset
= CHAR_CHARSET (c
);
958 return CHARSET_SYMBOL (charset
);
961 DEFUN ("iso-charset", Fiso_charset
, Siso_charset
, 3, 3, 0,
962 "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
964 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
965 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
966 where as Emacs distinguishes them by charset symbol.\n\
967 See the documentation of the function `charset-info' for the meanings of\n\
968 DIMENSION, CHARS, and FINAL-CHAR.")
969 (dimension
, chars
, final_char
)
970 Lisp_Object dimension
, chars
, final_char
;
974 CHECK_NUMBER (dimension
, 0);
975 CHECK_NUMBER (chars
, 1);
976 CHECK_NUMBER (final_char
, 2);
978 if ((charset
= ISO_CHARSET_TABLE (dimension
, chars
, final_char
)) < 0)
980 return CHARSET_SYMBOL (charset
);
983 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
984 generic character. If GENERICP is zero, return nonzero iff C is a
985 valid normal character. Do not call this function directly,
986 instead use macro CHAR_VALID_P. */
988 char_valid_p (c
, genericp
)
995 if (SINGLE_BYTE_CHAR_P (c
))
997 SPLIT_NON_ASCII_CHAR (c
, charset
, c1
, c2
);
998 if (charset
!= CHARSET_COMPOSITION
&& !CHARSET_DEFINED_P (charset
))
1000 return (c
< MIN_CHAR_COMPOSITION
1001 ? ((c
& CHAR_FIELD1_MASK
) /* i.e. dimension of C is two. */
1002 ? (genericp
&& c1
== 0 && c2
== 0
1003 || c1
>= 32 && c2
>= 32)
1004 : (genericp
&& c1
== 0
1006 : c
< MIN_CHAR_COMPOSITION
+ n_cmpchars
);
1009 DEFUN ("char-valid-p", Fchar_valid_p
, Schar_valid_p
, 1, 2, 0,
1010 "Return t if OBJECT is a valid normal character.\n\
1011 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
1012 a valid generic character.")
1014 Lisp_Object object
, genericp
;
1016 if (! NATNUMP (object
))
1018 return (CHAR_VALID_P (XFASTINT (object
), !NILP (genericp
)) ? Qt
: Qnil
);
1021 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte
,
1022 Sunibyte_char_to_multibyte
, 1, 1, 0,
1023 "Convert the unibyte character CH to multibyte character.\n\
1024 The conversion is done based on `nonascii-translation-table' (which see)\n\
1025 or `nonascii-insert-offset' (which see).")
1031 CHECK_NUMBER (ch
, 0);
1033 if (c
< 0 || c
>= 0400)
1034 error ("Invalid unibyte character: %d", c
);
1035 c
= unibyte_char_to_multibyte (c
);
1037 error ("Can't convert to multibyte character: %d", XINT (ch
));
1038 return make_number (c
);
1041 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte
,
1042 Smultibyte_char_to_unibyte
, 1, 1, 0,
1043 "Convert the multibyte character CH to unibyte character.\n\
1044 The conversion is done based on `nonascii-translation-table' (which see)\n\
1045 or `nonascii-insert-offset' (which see).")
1051 CHECK_NUMBER (ch
, 0);
1054 error ("Invalid multibyte character: %d", c
);
1055 c
= multibyte_char_to_unibyte (c
, Qnil
);
1057 error ("Can't convert to unibyte character: %d", XINT (ch
));
1058 return make_number (c
);
1061 DEFUN ("char-bytes", Fchar_bytes
, Schar_bytes
, 1, 1, 0,
1062 "Return 1 regardless of the argument CHAR.\n\
1063 This is now an obsolete function. We keep it just for backward compatibility.")
1069 CHECK_NUMBER (ch
, 0);
1070 return make_number (1);
1073 /* Return how many bytes C will occupy in a multibyte buffer.
1074 Don't call this function directly, instead use macro CHAR_BYTES. */
1081 if (COMPOSITE_CHAR_P (c
))
1083 unsigned int id
= COMPOSITE_CHAR_ID (c
);
1085 bytes
= (id
< n_cmpchars
? cmpchar_table
[id
]->len
: 1);
1089 int charset
= CHAR_CHARSET (c
);
1091 bytes
= CHARSET_DEFINED_P (charset
) ? CHARSET_BYTES (charset
) : 1;
1097 /* Return the width of character of which multi-byte form starts with
1098 C. The width is measured by how many columns occupied on the
1099 screen when displayed in the current buffer. */
1101 #define ONE_BYTE_CHAR_WIDTH(c) \
1104 ? XFASTINT (current_buffer->tab_width) \
1105 : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
1109 ? (NILP (current_buffer->ctl_arrow) ? 4 : 2) \
1110 : ((! NILP (current_buffer->enable_multibyte_characters) \
1111 && BASE_LEADING_CODE_P (c)) \
1112 ? WIDTH_BY_CHAR_HEAD (c) \
1115 DEFUN ("char-width", Fchar_width
, Schar_width
, 1, 1, 0,
1116 "Return width of CHAR when displayed in the current buffer.\n\
1117 The width is measured by how many columns it occupies on the screen.")
1121 Lisp_Object val
, disp
;
1123 struct Lisp_Char_Table
*dp
= buffer_display_table ();
1125 CHECK_NUMBER (ch
, 0);
1129 /* Get the way the display table would display it. */
1130 disp
= dp
? DISP_CHAR_VECTOR (dp
, c
) : Qnil
;
1133 XSETINT (val
, XVECTOR (disp
)->size
);
1134 else if (SINGLE_BYTE_CHAR_P (c
))
1135 XSETINT (val
, ONE_BYTE_CHAR_WIDTH (c
));
1136 else if (COMPOSITE_CHAR_P (c
))
1138 int id
= COMPOSITE_CHAR_ID (XFASTINT (ch
));
1139 XSETFASTINT (val
, (id
< n_cmpchars
? cmpchar_table
[id
]->width
: 0));
1143 int charset
= CHAR_CHARSET (c
);
1145 XSETFASTINT (val
, CHARSET_WIDTH (charset
));
1150 /* Return width of string STR of length LEN when displayed in the
1151 current buffer. The width is measured by how many columns it
1152 occupies on the screen. */
1159 unsigned char *endp
= str
+ len
;
1161 struct Lisp_Char_Table
*dp
= buffer_display_table ();
1165 if (*str
== LEADING_CODE_COMPOSITION
)
1167 int id
= str_cmpchar_id (str
, endp
- str
);
1176 width
+= cmpchar_table
[id
]->width
;
1177 str
+= cmpchar_table
[id
]->len
;
1184 int c
= STRING_CHAR_AND_LENGTH (str
, endp
- str
, thislen
);
1186 /* Get the way the display table would display it. */
1188 disp
= DISP_CHAR_VECTOR (dp
, c
);
1193 width
+= XVECTOR (disp
)->size
;
1195 width
+= ONE_BYTE_CHAR_WIDTH (*str
);
1203 DEFUN ("string-width", Fstring_width
, Sstring_width
, 1, 1, 0,
1204 "Return width of STRING when displayed in the current buffer.\n\
1205 Width is measured by how many columns it occupies on the screen.\n\
1206 When calculating width of a multibyte character in STRING,\n\
1207 only the base leading-code is considered; the validity of\n\
1208 the following bytes is not checked.")
1214 CHECK_STRING (str
, 0);
1215 XSETFASTINT (val
, strwidth (XSTRING (str
)->data
,
1216 STRING_BYTES (XSTRING (str
))));
1220 DEFUN ("char-direction", Fchar_direction
, Schar_direction
, 1, 1, 0,
1221 "Return the direction of CHAR.\n\
1222 The returned value is 0 for left-to-right and 1 for right-to-left.")
1228 CHECK_NUMBER (ch
, 0);
1229 charset
= CHAR_CHARSET (XFASTINT (ch
));
1230 if (!CHARSET_DEFINED_P (charset
))
1231 invalid_character (XINT (ch
));
1232 return CHARSET_TABLE_INFO (charset
, CHARSET_DIRECTION_IDX
);
1235 DEFUN ("chars-in-region", Fchars_in_region
, Schars_in_region
, 2, 2, 0,
1236 "Return number of characters between BEG and END.")
1238 Lisp_Object beg
, end
;
1242 CHECK_NUMBER_COERCE_MARKER (beg
, 0);
1243 CHECK_NUMBER_COERCE_MARKER (end
, 1);
1245 from
= min (XFASTINT (beg
), XFASTINT (end
));
1246 to
= max (XFASTINT (beg
), XFASTINT (end
));
1248 return make_number (to
- from
);
1251 /* Return the number of characters in the NBYTES bytes at PTR.
1252 This works by looking at the contents and checking for multibyte sequences.
1253 However, if the current buffer has enable-multibyte-characters = nil,
1254 we treat each byte as a character. */
1257 chars_in_text (ptr
, nbytes
)
1261 unsigned char *endp
, c
;
1264 /* current_buffer is null at early stages of Emacs initialization. */
1265 if (current_buffer
== 0
1266 || NILP (current_buffer
->enable_multibyte_characters
))
1269 endp
= ptr
+ nbytes
;
1276 if (BASE_LEADING_CODE_P (c
))
1277 while (ptr
< endp
&& ! CHAR_HEAD_P (*ptr
)) ptr
++;
1284 /* Return the number of characters in the NBYTES bytes at PTR.
1285 This works by looking at the contents and checking for multibyte sequences.
1286 It ignores enable-multibyte-characters. */
1289 multibyte_chars_in_text (ptr
, nbytes
)
1293 unsigned char *endp
, c
;
1296 endp
= ptr
+ nbytes
;
1303 if (BASE_LEADING_CODE_P (c
))
1304 while (ptr
< endp
&& ! CHAR_HEAD_P (*ptr
)) ptr
++;
1311 DEFUN ("string", Fstring
, Sstring
, 1, MANY
, 0,
1312 "Concatenate all the argument characters and make the result a string.")
1319 = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM
* n
);
1320 unsigned char *p
= buf
;
1323 for (i
= 0; i
< n
; i
++)
1328 if (!INTEGERP (args
[i
]))
1329 CHECK_NUMBER (args
[i
], 0);
1331 len
= CHAR_STRING (c
, p
, str
);
1333 /* C is a composite character. */
1334 bcopy (str
, p
, len
);
1338 /* Here, we can't use make_string_from_bytes because of byte
1339 combining problem. */
1340 val
= make_string (buf
, p
- buf
);
1346 /*** Composite characters staffs ***/
1348 /* Each composite character is identified by CMPCHAR-ID which is
1349 assigned when Emacs needs the character code of the composite
1350 character (e.g. when displaying it on the screen). See the
1351 document "GENERAL NOTE on COMPOSITE CHARACTER" in `charset.h' how a
1352 composite character is represented in Emacs. */
1354 /* If `static' is defined, it means that it is defined to null string. */
1356 /* The following function is copied from lread.c. */
1358 hash_string (ptr
, len
)
1362 register unsigned char *p
= ptr
;
1363 register unsigned char *end
= p
+ len
;
1364 register unsigned char c
;
1365 register int hash
= 0;
1370 if (c
>= 0140) c
-= 40;
1371 hash
= ((hash
<<3) + (hash
>>28) + c
);
1373 return hash
& 07777777777;
1377 #define CMPCHAR_HASH_TABLE_SIZE 0xFFF
1379 static int *cmpchar_hash_table
[CMPCHAR_HASH_TABLE_SIZE
];
1381 /* Each element of `cmpchar_hash_table' is a pointer to an array of
1382 integer, where the 1st element is the size of the array, the 2nd
1383 element is how many elements are actually used in the array, and
1384 the remaining elements are CMPCHAR-IDs of composite characters of
1385 the same hash value. */
1386 #define CMPCHAR_HASH_SIZE(table) table[0]
1387 #define CMPCHAR_HASH_USED(table) table[1]
1388 #define CMPCHAR_HASH_CMPCHAR_ID(table, i) table[i]
1390 /* Return CMPCHAR-ID of the composite character in STR of the length
1391 LEN. If the composite character has not yet been registered,
1392 register it in `cmpchar_table' and assign new CMPCHAR-ID. This
1393 is the sole function for assigning CMPCHAR-ID. */
1395 str_cmpchar_id (str
, len
)
1396 const unsigned char *str
;
1399 int hash_idx
, *hashp
;
1401 int embedded_rule
; /* 1 if composition rule is embedded. */
1402 int chars
; /* number of components. */
1404 struct cmpchar_info
*cmpcharp
;
1406 /* The second byte 0xFF means compostion rule is embedded. */
1407 embedded_rule
= (str
[1] == 0xFF);
1409 /* At first, get the actual length of the composite character. */
1411 const unsigned char *p
, *endp
= str
+ 1, *lastp
= str
+ len
;
1414 while (endp
< lastp
&& ! CHAR_HEAD_P (*endp
)) endp
++;
1416 /* Any composite char have at least 5-byte length. */
1429 /* No need of checking if *P is 0xA0 because
1430 BYTES_BY_CHAR_HEAD (0x80) surely returns 2. */
1431 p
+= BYTES_BY_CHAR_HEAD (*p
- 0x20);
1434 if (p
> endp
|| chars
< 2 || chars
> MAX_COMPONENT_COUNT
)
1435 /* Invalid components. */
1439 hash_idx
= hash_string (str
, len
) % CMPCHAR_HASH_TABLE_SIZE
;
1440 hashp
= cmpchar_hash_table
[hash_idx
];
1442 /* Then, look into the hash table. */
1444 /* Find the correct one among composite characters of the same
1446 for (i
= 2; i
< CMPCHAR_HASH_USED (hashp
); i
++)
1448 cmpcharp
= cmpchar_table
[CMPCHAR_HASH_CMPCHAR_ID (hashp
, i
)];
1449 if (len
== cmpcharp
->len
1450 && ! bcmp (str
, cmpcharp
->data
, len
))
1451 return CMPCHAR_HASH_CMPCHAR_ID (hashp
, i
);
1454 /* We have to register the composite character in cmpchar_table. */
1455 if (n_cmpchars
> (CHAR_FIELD2_MASK
| CHAR_FIELD3_MASK
))
1456 /* No, we have no more room for a new composite character. */
1459 /* Make the entry in hash table. */
1462 /* Make a table for 8 composite characters initially. */
1463 hashp
= (cmpchar_hash_table
[hash_idx
]
1464 = (int *) xmalloc (sizeof (int) * (2 + 8)));
1465 CMPCHAR_HASH_SIZE (hashp
) = 10;
1466 CMPCHAR_HASH_USED (hashp
) = 2;
1468 else if (CMPCHAR_HASH_USED (hashp
) >= CMPCHAR_HASH_SIZE (hashp
))
1470 CMPCHAR_HASH_SIZE (hashp
) += 8;
1471 hashp
= (cmpchar_hash_table
[hash_idx
]
1472 = (int *) xrealloc (hashp
,
1473 sizeof (int) * CMPCHAR_HASH_SIZE (hashp
)));
1475 CMPCHAR_HASH_CMPCHAR_ID (hashp
, CMPCHAR_HASH_USED (hashp
)) = n_cmpchars
;
1476 CMPCHAR_HASH_USED (hashp
)++;
1478 /* Set information of the composite character in cmpchar_table. */
1479 if (cmpchar_table_size
== 0)
1481 /* This is the first composite character to be registered. */
1482 cmpchar_table_size
= 256;
1484 = (struct cmpchar_info
**) xmalloc (sizeof (cmpchar_table
[0])
1485 * cmpchar_table_size
);
1487 else if (cmpchar_table_size
<= n_cmpchars
)
1489 cmpchar_table_size
+= 256;
1491 = (struct cmpchar_info
**) xrealloc (cmpchar_table
,
1492 sizeof (cmpchar_table
[0])
1493 * cmpchar_table_size
);
1496 cmpcharp
= (struct cmpchar_info
*) xmalloc (sizeof (struct cmpchar_info
));
1498 cmpcharp
->len
= len
;
1499 cmpcharp
->data
= (unsigned char *) xmalloc (len
+ 1);
1500 bcopy (str
, cmpcharp
->data
, len
);
1501 cmpcharp
->data
[len
] = 0;
1502 cmpcharp
->glyph_len
= chars
;
1503 cmpcharp
->glyph
= (GLYPH
*) xmalloc (sizeof (GLYPH
) * chars
);
1506 cmpcharp
->cmp_rule
= (unsigned char *) xmalloc (chars
);
1507 cmpcharp
->col_offset
= (float *) xmalloc (sizeof (float) * chars
);
1511 cmpcharp
->cmp_rule
= NULL
;
1512 cmpcharp
->col_offset
= NULL
;
1515 /* Setup GLYPH data and composition rules (if any) so as not to make
1516 them every time on displaying. */
1518 unsigned char *bufp
;
1520 float leftmost
= 0.0, rightmost
= 1.0;
1523 /* At first, col_offset[N] is set to relative to col_offset[0]. */
1524 cmpcharp
->col_offset
[0] = 0;
1526 for (i
= 0, bufp
= cmpcharp
->data
+ 1; i
< chars
; i
++)
1529 cmpcharp
->cmp_rule
[i
] = *bufp
++;
1531 if (*bufp
== 0xA0) /* This is an ASCII character. */
1533 cmpcharp
->glyph
[i
] = FAST_MAKE_GLYPH ((*++bufp
& 0x7F), 0);
1537 else /* Multibyte character. */
1539 /* Make `bufp' point normal multi-byte form temporally. */
1542 = FAST_MAKE_GLYPH (string_to_non_ascii_char (bufp
, 4, 0, 0), 0);
1543 width
= WIDTH_BY_CHAR_HEAD (*bufp
);
1545 bufp
+= BYTES_BY_CHAR_HEAD (*bufp
- 0x20);
1548 if (embedded_rule
&& i
> 0)
1550 /* Reference points (global_ref and new_ref) are
1561 Now, we calculate the column offset of the new glyph
1562 from the left edge of the first glyph. This can avoid
1563 the same calculation everytime displaying this
1564 composite character. */
1566 /* Reference points of global glyph and new glyph. */
1567 int global_ref
= (cmpcharp
->cmp_rule
[i
] - 0xA0) / 9;
1568 int new_ref
= (cmpcharp
->cmp_rule
[i
] - 0xA0) % 9;
1569 /* Column offset relative to the first glyph. */
1570 float left
= (leftmost
1571 + (global_ref
% 3) * (rightmost
- leftmost
) / 2.0
1572 - (new_ref
% 3) * width
/ 2.0);
1574 cmpcharp
->col_offset
[i
] = left
;
1575 if (left
< leftmost
)
1577 if (left
+ width
> rightmost
)
1578 rightmost
= left
+ width
;
1582 if (width
> rightmost
)
1588 /* Now col_offset[N] are relative to the left edge of the
1589 first component. Make them relative to the left edge of
1591 for (i
= 0; i
< chars
; i
++)
1592 cmpcharp
->col_offset
[i
] -= leftmost
;
1593 /* Make rightmost holds width of overall glyph. */
1594 rightmost
-= leftmost
;
1597 cmpcharp
->width
= rightmost
;
1598 if (cmpcharp
->width
< rightmost
)
1599 /* To get a ceiling integer value. */
1603 cmpchar_table
[n_cmpchars
] = cmpcharp
;
1605 return n_cmpchars
++;
1608 /* Return the Nth element of the composite character C. If NOERROR is
1609 nonzero, return 0 on error condition (C is an invalid composite
1610 charcter, or N is out of range). */
1612 cmpchar_component (c
, n
, noerror
)
1615 int id
= COMPOSITE_CHAR_ID (c
);
1617 if (id
< 0 || id
>= n_cmpchars
)
1619 /* C is not a valid composite character. */
1620 if (noerror
) return 0;
1621 error ("Invalid composite character: %d", c
) ;
1623 if (n
>= cmpchar_table
[id
]->glyph_len
)
1625 /* No such component. */
1626 if (noerror
) return 0;
1627 args_out_of_range (make_number (c
), make_number (n
));
1629 /* No face data is stored in glyph code. */
1630 return ((int) (cmpchar_table
[id
]->glyph
[n
]));
1633 DEFUN ("cmpcharp", Fcmpcharp
, Scmpcharp
, 1, 1, 0,
1634 "T if CHAR is a composite character.")
1638 CHECK_NUMBER (ch
, 0);
1639 return (COMPOSITE_CHAR_P (XINT (ch
)) ? Qt
: Qnil
);
1642 DEFUN ("composite-char-component", Fcmpchar_component
, Scmpchar_component
,
1644 "Return the Nth component character of composite character CHARACTER.")
1646 Lisp_Object character
, n
;
1650 CHECK_NUMBER (character
, 0);
1651 CHECK_NUMBER (n
, 1);
1653 return (make_number (cmpchar_component (XINT (character
), XINT (n
), 0)));
1656 DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule
, Scmpchar_cmp_rule
,
1658 "Return the Nth composition rule of composite character CHARACTER.\n\
1659 The returned rule is for composing the Nth component\n\
1660 on the (N-1)th component.\n\
1661 If CHARACTER should be composed relatively or N is 0, return 255.")
1663 Lisp_Object character
, n
;
1667 CHECK_NUMBER (character
, 0);
1668 CHECK_NUMBER (n
, 1);
1670 id
= COMPOSITE_CHAR_ID (XINT (character
));
1671 if (id
< 0 || id
>= n_cmpchars
)
1672 error ("Invalid composite character: %d", XINT (character
));
1673 if (XINT (n
) < 0 || XINT (n
) >= cmpchar_table
[id
]->glyph_len
)
1674 args_out_of_range (character
, n
);
1676 return make_number (cmpchar_table
[id
]->cmp_rule
1677 ? cmpchar_table
[id
]->cmp_rule
[XINT (n
)]
1681 DEFUN ("composite-char-composition-rule-p", Fcmpchar_cmp_rule_p
,
1682 Scmpchar_cmp_rule_p
, 1, 1, 0,
1683 "Return non-nil if composite character CHARACTER contains a embedded rule.")
1685 Lisp_Object character
;
1689 CHECK_NUMBER (character
, 0);
1690 id
= COMPOSITE_CHAR_ID (XINT (character
));
1691 if (id
< 0 || id
>= n_cmpchars
)
1692 error ("Invalid composite character: %d", XINT (character
));
1694 return (cmpchar_table
[id
]->cmp_rule
? Qt
: Qnil
);
1697 DEFUN ("composite-char-component-count", Fcmpchar_cmp_count
,
1698 Scmpchar_cmp_count
, 1, 1, 0,
1699 "Return number of compoents of composite character CHARACTER.")
1701 Lisp_Object character
;
1705 CHECK_NUMBER (character
, 0);
1706 id
= COMPOSITE_CHAR_ID (XINT (character
));
1707 if (id
< 0 || id
>= n_cmpchars
)
1708 error ("Invalid composite character: %d", XINT (character
));
1710 return (make_number (cmpchar_table
[id
]->glyph_len
));
1713 DEFUN ("compose-string", Fcompose_string
, Scompose_string
,
1715 "Return one char string composed from all characters in STRING.")
1719 unsigned char buf
[MAX_LENGTH_OF_MULTI_BYTE_FORM
], *p
, *pend
, *ptemp
;
1722 CHECK_STRING (str
, 0);
1724 buf
[0] = LEADING_CODE_COMPOSITION
;
1725 p
= XSTRING (str
)->data
;
1726 pend
= p
+ STRING_BYTES (XSTRING (str
));
1730 if (*p
< 0x20) /* control code */
1731 error ("Invalid component character: %d", *p
);
1732 else if (*p
< 0x80) /* ASCII */
1734 if (i
+ 2 >= MAX_LENGTH_OF_MULTI_BYTE_FORM
)
1735 error ("Too long string to be composed: %s", XSTRING (str
)->data
);
1736 /* Prepend an ASCII charset indicator 0xA0, set MSB of the
1739 buf
[i
++] = *p
++ + 0x80;
1741 else if (*p
== LEADING_CODE_COMPOSITION
) /* composite char */
1743 /* Already composed. Eliminate the heading
1744 LEADING_CODE_COMPOSITION, keep the remaining bytes
1748 error ("Can't compose a rule-based composition character");
1750 while (! CHAR_HEAD_P (*p
)) p
++;
1751 if (str_cmpchar_id (ptemp
- 1, p
- ptemp
+ 1) < 0)
1752 error ("Can't compose an invalid composition character");
1753 if (i
+ (p
- ptemp
) >= MAX_LENGTH_OF_MULTI_BYTE_FORM
)
1754 error ("Too long string to be composed: %s", XSTRING (str
)->data
);
1755 bcopy (ptemp
, buf
+ i
, p
- ptemp
);
1758 else /* multibyte char */
1760 /* Add 0x20 to the base leading-code, keep the remaining
1762 int c
= STRING_CHAR_AND_CHAR_LENGTH (p
, pend
- p
, len
);
1764 if (len
<= 1 || ! CHAR_VALID_P (c
, 0))
1765 error ("Can't compose an invalid character");
1766 if (i
+ len
>= MAX_LENGTH_OF_MULTI_BYTE_FORM
)
1767 error ("Too long string to be composed: %s", XSTRING (str
)->data
);
1768 bcopy (p
, buf
+ i
, len
);
1775 /* STR contains only one character, which can't be composed. */
1776 error ("Too short string to be composed: %s", XSTRING (str
)->data
);
1778 return make_string_from_bytes (buf
, 1, i
);
1783 charset_id_internal (charset_name
)
1788 val
= Fget (intern (charset_name
), Qcharset
);
1790 error ("Charset %s is not defined", charset_name
);
1792 return (XINT (XVECTOR (val
)->contents
[0]));
1795 DEFUN ("setup-special-charsets", Fsetup_special_charsets
,
1796 Ssetup_special_charsets
, 0, 0, 0, "Internal use only.")
1799 charset_latin_iso8859_1
= charset_id_internal ("latin-iso8859-1");
1800 charset_jisx0208_1978
= charset_id_internal ("japanese-jisx0208-1978");
1801 charset_jisx0208
= charset_id_internal ("japanese-jisx0208");
1802 charset_katakana_jisx0201
= charset_id_internal ("katakana-jisx0201");
1803 charset_latin_jisx0201
= charset_id_internal ("latin-jisx0201");
1804 charset_big5_1
= charset_id_internal ("chinese-big5-1");
1805 charset_big5_2
= charset_id_internal ("chinese-big5-2");
1810 init_charset_once ()
1814 staticpro (&Vcharset_table
);
1815 staticpro (&Vcharset_symbol_table
);
1816 staticpro (&Vgeneric_character_list
);
1818 /* This has to be done here, before we call Fmake_char_table. */
1819 Qcharset_table
= intern ("charset-table");
1820 staticpro (&Qcharset_table
);
1822 /* Intern this now in case it isn't already done.
1823 Setting this variable twice is harmless.
1824 But don't staticpro it here--that is done in alloc.c. */
1825 Qchar_table_extra_slots
= intern ("char-table-extra-slots");
1827 /* Now we are ready to set up this property, so we can
1828 create the charset table. */
1829 Fput (Qcharset_table
, Qchar_table_extra_slots
, make_number (0));
1830 Vcharset_table
= Fmake_char_table (Qcharset_table
, Qnil
);
1832 Vcharset_symbol_table
= Fmake_vector (make_number (MAX_CHARSET
+ 1), Qnil
);
1835 for (i
= 0; i
< 2; i
++)
1836 for (j
= 0; j
< 2; j
++)
1837 for (k
= 0; k
< 128; k
++)
1838 iso_charset_table
[i
][j
][k
] = -1;
1840 bzero (cmpchar_hash_table
, sizeof cmpchar_hash_table
);
1841 cmpchar_table_size
= n_cmpchars
= 0;
1843 for (i
= 0; i
< 256; i
++)
1844 BYTES_BY_CHAR_HEAD (i
) = 1;
1845 for (i
= MIN_CHARSET_OFFICIAL_DIMENSION1
;
1846 i
<= MAX_CHARSET_OFFICIAL_DIMENSION1
; i
++)
1847 BYTES_BY_CHAR_HEAD (i
) = 2;
1848 for (i
= MIN_CHARSET_OFFICIAL_DIMENSION2
;
1849 i
<= MAX_CHARSET_OFFICIAL_DIMENSION2
; i
++)
1850 BYTES_BY_CHAR_HEAD (i
) = 3;
1851 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11
) = 3;
1852 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12
) = 3;
1853 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21
) = 4;
1854 BYTES_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22
) = 4;
1855 /* The followings don't reflect the actual bytes, but just to tell
1856 that it is a start of a multibyte character. */
1857 BYTES_BY_CHAR_HEAD (LEADING_CODE_COMPOSITION
) = 2;
1858 BYTES_BY_CHAR_HEAD (0x9E) = 2;
1859 BYTES_BY_CHAR_HEAD (0x9F) = 2;
1861 for (i
= 0; i
< 128; i
++)
1862 WIDTH_BY_CHAR_HEAD (i
) = 1;
1863 for (; i
< 256; i
++)
1864 WIDTH_BY_CHAR_HEAD (i
) = 4;
1865 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_11
) = 1;
1866 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_12
) = 2;
1867 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_21
) = 1;
1868 WIDTH_BY_CHAR_HEAD (LEADING_CODE_PRIVATE_22
) = 2;
1874 for (i
= 0x81; i
< 0x90; i
++)
1875 val
= Fcons (make_number ((i
- 0x70) << 7), val
);
1876 for (; i
< 0x9A; i
++)
1877 val
= Fcons (make_number ((i
- 0x8F) << 14), val
);
1878 for (i
= 0xA0; i
< 0xF0; i
++)
1879 val
= Fcons (make_number ((i
- 0x70) << 7), val
);
1880 for (; i
< 0xFF; i
++)
1881 val
= Fcons (make_number ((i
- 0xE0) << 14), val
);
1882 val
= Fcons (make_number (GENERIC_COMPOSITION_CHAR
), val
);
1883 Vgeneric_character_list
= Fnreverse (val
);
1886 nonascii_insert_offset
= 0;
1887 Vnonascii_translation_table
= Qnil
;
1895 Qascii
= intern ("ascii");
1896 staticpro (&Qascii
);
1898 Qcharset
= intern ("charset");
1899 staticpro (&Qcharset
);
1901 /* Define ASCII charset now. */
1902 update_charset_table (make_number (CHARSET_ASCII
),
1903 make_number (1), make_number (94),
1908 build_string ("ASCII"),
1909 build_string ("ASCII"),
1910 build_string ("ASCII (ISO646 IRV)"));
1911 CHARSET_SYMBOL (CHARSET_ASCII
) = Qascii
;
1912 Fput (Qascii
, Qcharset
, CHARSET_TABLE_ENTRY (CHARSET_ASCII
));
1914 Qcomposition
= intern ("composition");
1915 staticpro (&Qcomposition
);
1916 CHARSET_SYMBOL (CHARSET_COMPOSITION
) = Qcomposition
;
1918 Qauto_fill_chars
= intern ("auto-fill-chars");
1919 staticpro (&Qauto_fill_chars
);
1920 Fput (Qauto_fill_chars
, Qchar_table_extra_slots
, make_number (0));
1922 defsubr (&Sdefine_charset
);
1923 defsubr (&Sgeneric_character_list
);
1924 defsubr (&Sget_unused_iso_final_char
);
1925 defsubr (&Sdeclare_equiv_charset
);
1926 defsubr (&Sfind_charset_region
);
1927 defsubr (&Sfind_charset_string
);
1928 defsubr (&Smake_char_internal
);
1929 defsubr (&Ssplit_char
);
1930 defsubr (&Schar_charset
);
1931 defsubr (&Scharset_after
);
1932 defsubr (&Siso_charset
);
1933 defsubr (&Schar_valid_p
);
1934 defsubr (&Sunibyte_char_to_multibyte
);
1935 defsubr (&Smultibyte_char_to_unibyte
);
1936 defsubr (&Schar_bytes
);
1937 defsubr (&Schar_width
);
1938 defsubr (&Sstring_width
);
1939 defsubr (&Schar_direction
);
1940 defsubr (&Schars_in_region
);
1942 defsubr (&Scmpcharp
);
1943 defsubr (&Scmpchar_component
);
1944 defsubr (&Scmpchar_cmp_rule
);
1945 defsubr (&Scmpchar_cmp_rule_p
);
1946 defsubr (&Scmpchar_cmp_count
);
1947 defsubr (&Scompose_string
);
1948 defsubr (&Ssetup_special_charsets
);
1950 DEFVAR_LISP ("charset-list", &Vcharset_list
,
1951 "List of charsets ever defined.");
1952 Vcharset_list
= Fcons (Qascii
, Qnil
);
1954 DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector
,
1955 "Vector of cons cell of a symbol and translation table ever defined.\n\
1956 An ID of a translation table is an index of this vector.");
1957 Vtranslation_table_vector
= Fmake_vector (make_number (16), Qnil
);
1959 DEFVAR_INT ("leading-code-composition", &leading_code_composition
,
1960 "Leading-code of composite characters.");
1961 leading_code_composition
= LEADING_CODE_COMPOSITION
;
1963 DEFVAR_INT ("leading-code-private-11", &leading_code_private_11
,
1964 "Leading-code of private TYPE9N charset of column-width 1.");
1965 leading_code_private_11
= LEADING_CODE_PRIVATE_11
;
1967 DEFVAR_INT ("leading-code-private-12", &leading_code_private_12
,
1968 "Leading-code of private TYPE9N charset of column-width 2.");
1969 leading_code_private_12
= LEADING_CODE_PRIVATE_12
;
1971 DEFVAR_INT ("leading-code-private-21", &leading_code_private_21
,
1972 "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1973 leading_code_private_21
= LEADING_CODE_PRIVATE_21
;
1975 DEFVAR_INT ("leading-code-private-22", &leading_code_private_22
,
1976 "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1977 leading_code_private_22
= LEADING_CODE_PRIVATE_22
;
1979 DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset
,
1980 "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
1981 This is used for converting unibyte text to multibyte,\n\
1982 and for inserting character codes specified by number.\n\n\
1983 This serves to convert a Latin-1 or similar 8-bit character code\n\
1984 to the corresponding Emacs multibyte character code.\n\
1985 Typically the value should be (- (make-char CHARSET 0) 128),\n\
1986 for your choice of character set.\n\
1987 If `nonascii-translation-table' is non-nil, it overrides this variable.");
1988 nonascii_insert_offset
= 0;
1990 DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table
,
1991 "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
1992 This is used for converting unibyte text to multibyte,\n\
1993 and for inserting character codes specified by number.\n\n\
1994 Conversion is performed only when multibyte characters are enabled,\n\
1995 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1996 to the corresponding Emacs character code.\n\n\
1997 If this is nil, `nonascii-insert-offset' is used instead.\n\
1998 See also the docstring of `make-translation-table'.");
1999 Vnonascii_translation_table
= Qnil
;
2001 DEFVAR_INT ("min-composite-char", &min_composite_char
,
2002 "Minimum character code of a composite character.");
2003 min_composite_char
= MIN_CHAR_COMPOSITION
;
2005 DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars
,
2006 "A char-table for characters which invoke auto-filling.\n\
2007 Such characters has value t in this table.");
2008 Vauto_fill_chars
= Fmake_char_table (Qauto_fill_chars
, Qnil
);
2009 CHAR_TABLE_SET (Vauto_fill_chars
, make_number (' '), Qt
);
2010 CHAR_TABLE_SET (Vauto_fill_chars
, make_number ('\n'), Qt
);