-/* #define BYTE_COMBINING_DEBUG */
-
-/*** GENERAL NOTE on CHARACTER SET (CHARSET) ***
-
- A character set ("charset" hereafter) is a meaningful collection
- (i.e. language, culture, functionality, etc) of characters. Emacs
- handles multiple charsets at once. Each charset corresponds to one
- of the ISO charsets. Emacs identifies a charset by a unique
- identification number, whereas ISO identifies a charset by a triplet
- of DIMENSION, CHARS and FINAL-CHAR. So, hereafter, just saying
- "charset" means an identification number (integer value).
-
- The value range of charsets is 0x00, 0x81..0xFE. There are four
- kinds of charset depending on DIMENSION (1 or 2) and CHARS (94 or
- 96). For instance, a charset of DIMENSION2_CHARS94 contains 94x94
- characters.
-
- Within Emacs Lisp, a charset is treated as a symbol which has a
- property `charset'. The property value is a vector containing
- various information about the charset. For readability of C code,
- we use the following convention for C variable names:
- charset_symbol: Emacs Lisp symbol of a charset
- charset_id: Emacs Lisp integer of an identification number of a charset
- charset: C integer of an identification number of a charset
-
- Each charset (except for ascii) is assigned a base leading-code
- (range 0x80..0x9E). In addition, a charset of greater than 0xA0
- (whose base leading-code is 0x9A..0x9D) is assigned an extended
- leading-code (range 0xA0..0xFE). In this case, each base
- leading-code specifies the allowable range of extended leading-code
- as shown in the table below. A leading-code is used to represent a
- character in Emacs' buffer and string.
-
- We call a charset which has extended leading-code a "private
- charset" because those are mainly for a charset which is not yet
- registered by ISO. On the contrary, we call a charset which does
- not have extended leading-code an "official charset".
-
- ---------------------------------------------------------------------------
- charset dimension base leading-code extended leading-code
- ---------------------------------------------------------------------------
- 0x00 official dim1 -- none -- -- none --
- (ASCII)
- 0x01..0x7F --never used--
- 0x80 official dim1 -- none -- -- none --
- (eight-bit-graphic)
- 0x81..0x8F official dim1 same as charset -- none --
- 0x90..0x99 official dim2 same as charset -- none --
- 0x9A..0x9D --never used--
- 0x9E official dim1 same as charset -- none --
- (eight-bit-control)
- 0x9F --never used--
- 0xA0..0xDF private dim1 0x9A same as charset
- of 1-column width
- 0xE0..0xEF private dim1 0x9B same as charset
- of 2-column width
- 0xF0..0xF4 private dim2 0x9C same as charset
- of 1-column width
- 0xF5..0xFE private dim2 0x9D same as charset
- of 2-column width
- 0xFF --never used--
- ---------------------------------------------------------------------------
-
-*/
-
-/* Definition of special leading-codes. */
-/* Leading-code followed by extended leading-code. */
-#define LEADING_CODE_PRIVATE_11 0x9A /* for private DIMENSION1 of 1-column */
-#define LEADING_CODE_PRIVATE_12 0x9B /* for private DIMENSION1 of 2-column */
-#define LEADING_CODE_PRIVATE_21 0x9C /* for private DIMENSION2 of 1-column */
-#define LEADING_CODE_PRIVATE_22 0x9D /* for private DIMENSION2 of 2-column */
-
-#define LEADING_CODE_8_BIT_CONTROL 0x9E /* for `eight-bit-control' */
-
-/* Extended leading-code. */
-/* Start of each extended leading-codes. */
-#define LEADING_CODE_EXT_11 0xA0 /* follows LEADING_CODE_PRIVATE_11 */
-#define LEADING_CODE_EXT_12 0xE0 /* follows LEADING_CODE_PRIVATE_12 */
-#define LEADING_CODE_EXT_21 0xF0 /* follows LEADING_CODE_PRIVATE_21 */
-#define LEADING_CODE_EXT_22 0xF5 /* follows LEADING_CODE_PRIVATE_22 */
-/* Maximum value of extended leading-codes. */
-#define LEADING_CODE_EXT_MAX 0xFE
-
-/* Definition of minimum/maximum charset of each DIMENSION. */
-#define MIN_CHARSET_OFFICIAL_DIMENSION1 0x80
-#define MAX_CHARSET_OFFICIAL_DIMENSION1 0x8F
-#define MIN_CHARSET_OFFICIAL_DIMENSION2 0x90
-#define MAX_CHARSET_OFFICIAL_DIMENSION2 0x99
-#define MIN_CHARSET_PRIVATE_DIMENSION1 LEADING_CODE_EXT_11
-#define MIN_CHARSET_PRIVATE_DIMENSION2 LEADING_CODE_EXT_21
-
-/* Maximum value of overall charset identification number. */
-#define MAX_CHARSET 0xFE
-
-/* Definition of special charsets. */
-#define CHARSET_ASCII 0 /* 0x00..0x7F */
-#define CHARSET_8_BIT_CONTROL 0x9E /* 0x80..0x9F */
-#define CHARSET_8_BIT_GRAPHIC 0x80 /* 0xA0..0xFF */
-
-extern int charset_latin_iso8859_1; /* ISO8859-1 (Latin-1) */
-extern int charset_jisx0208_1978; /* JISX0208.1978 (Japanese Kanji old set) */
-extern int charset_jisx0208; /* JISX0208.1983 (Japanese Kanji) */
-extern int charset_katakana_jisx0201; /* JISX0201.Kana (Japanese Katakana) */
-extern int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
-extern int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
-extern int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
-extern int charset_mule_unicode_0100_24ff;
-extern int charset_mule_unicode_2500_33ff;
-extern int charset_mule_unicode_e000_ffff;
-
-/* Check if CH is an ASCII character or a base leading-code.
- Nowadays, any byte can be the first byte of a character in a
- multibyte buffer/string. So this macro name is not appropriate. */
-#define CHAR_HEAD_P(ch) ((unsigned char) (ch) < 0xA0)
-
-/*** GENERAL NOTE on CHARACTER REPRESENTATION ***
-
- Firstly, the term "character" or "char" is used for a multilingual
- character (of course, including ASCII characters), not for a byte in
- computer memory. We use the term "code" or "byte" for the latter
- case.
-
- A character is identified by charset and one or two POSITION-CODEs.
- POSITION-CODE is the position of the character in the charset. A
- character of DIMENSION1 charset has one POSITION-CODE: POSITION-CODE-1.
- A character of DIMENSION2 charset has two POSITION-CODE:
- POSITION-CODE-1 and POSITION-CODE-2. The code range of
- POSITION-CODE is 0x20..0x7F.
-
- Emacs has two kinds of representation of a character: multi-byte
- form (for buffers and strings) and single-word form (for character
- objects in Emacs Lisp). The latter is called "character code"
- hereafter. Both representations encode the information of charset
- and POSITION-CODE but in a different way (for instance, the MSB of
- POSITION-CODE is set in multi-byte form).
-
- For details of the multi-byte form, see the section "2. Emacs
- internal format handlers" of `coding.c'.
-
- Emacs uses 19 bits for a character code. The bits are divided into
- 3 fields: FIELD1(5bits):FIELD2(7bits):FIELD3(7bits).
-
- A character code of DIMENSION1 character uses FIELD2 to hold charset
- and FIELD3 to hold POSITION-CODE-1. A character code of DIMENSION2
- character uses FIELD1 to hold charset, FIELD2 and FIELD3 to hold
- POSITION-CODE-1 and POSITION-CODE-2 respectively.
-
- More precisely...
-
- FIELD2 of DIMENSION1 character (except for ascii, eight-bit-control,
- and eight-bit-graphic) is "charset - 0x70". This is to make all
- character codes except for ASCII and 8-bit codes greater than 256.
- So, the range of FIELD2 of DIMENSION1 character is 0, 1, or
- 0x11..0x7F.
-
- FIELD1 of DIMENSION2 character is "charset - 0x8F" for official
- charset and "charset - 0xE0" for private charset. So, the range of
- FIELD1 of DIMENSION2 character is 0x01..0x1E.
-
- -----------------------------------------------------------------------------
- charset FIELD1 (5-bit) FIELD2 (7-bit) FIELD3 (7-bit)
- -----------------------------------------------------------------------------
- ascii 0 0 0x00..0x7F
- eight-bit-control 0 1 0x00..0x1F
- eight-bit-graphic 0 1 0x20..0x7F
- DIMENSION1 0 charset - 0x70 POSITION-CODE-1
- DIMENSION2(o) charset - 0x8F POSITION-CODE-1 POSITION-CODE-2
- DIMENSION2(p) charset - 0xE0 POSITION-CODE-1 POSITION-CODE-2
- -----------------------------------------------------------------------------
- "(o)": official, "(p)": private
- -----------------------------------------------------------------------------
-*/
-
-/* Masks of each field of character code. */
-#define CHAR_FIELD1_MASK (0x1F << 14)
-#define CHAR_FIELD2_MASK (0x7F << 7)
-#define CHAR_FIELD3_MASK 0x7F
-
-/* Macros to access each field of character C. */
-#define CHAR_FIELD1(c) (((c) & CHAR_FIELD1_MASK) >> 14)
-#define CHAR_FIELD2(c) (((c) & CHAR_FIELD2_MASK) >> 7)
-#define CHAR_FIELD3(c) ((c) & CHAR_FIELD3_MASK)
-
-/* Minimum character code of character of each DIMENSION. */
-#define MIN_CHAR_OFFICIAL_DIMENSION1 \
- ((0x81 - 0x70) << 7)
-#define MIN_CHAR_PRIVATE_DIMENSION1 \
- ((MIN_CHARSET_PRIVATE_DIMENSION1 - 0x70) << 7)
-#define MIN_CHAR_OFFICIAL_DIMENSION2 \
- ((MIN_CHARSET_OFFICIAL_DIMENSION2 - 0x8F) << 14)
-#define MIN_CHAR_PRIVATE_DIMENSION2 \
- ((MIN_CHARSET_PRIVATE_DIMENSION2 - 0xE0) << 14)
-/* Maximum character code currently used plus 1. */
-#define MAX_CHAR (0x1F << 14)
-
-/* 1 if C is a single byte character, else 0. */
-#define SINGLE_BYTE_CHAR_P(c) (((unsigned)(c) & 0xFF) == (c))
-
-/* 1 if BYTE is an ASCII character in itself, in multibyte mode. */
-#define ASCII_BYTE_P(byte) ((byte) < 0x80)
-
-/* A char-table containing information on each character set.
-
- Unlike ordinary char-tables, this doesn't contain any nested tables.
- Only the top level elements are used. Each element is a vector of
- the following information:
- CHARSET-ID, BYTES, DIMENSION, CHARS, WIDTH, DIRECTION,
- LEADING-CODE-BASE, LEADING-CODE-EXT,
- ISO-FINAL-CHAR, ISO-GRAPHIC-PLANE,
- REVERSE-CHARSET, SHORT-NAME, LONG-NAME, DESCRIPTION,
- PLIST.
-
- CHARSET-ID (integer) is the identification number of the charset.
-
- BYTES (integer) is the length of the multi-byte form of a character
- in the charset: one of 1, 2, 3, and 4.
-
- DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
-
- CHARS (integer) is the number of characters in a dimension: 94 or 96.
-
- WIDTH (integer) is the number of columns a character in the charset
- occupies on the screen: one of 0, 1, and 2..
-
- DIRECTION (integer) is the rendering direction of characters in the
- charset when rendering. If 0, render from left to right, else
- render from right to left.
-
- LEADING-CODE-BASE (integer) is the base leading-code for the
- charset.
-
- LEADING-CODE-EXT (integer) is the extended leading-code for the
- charset. All charsets of less than 0xA0 have the value 0.
-
- ISO-FINAL-CHAR (character) is the final character of the
- corresponding ISO 2022 charset. It is -1 for such a character
- that is used only internally (e.g. `eight-bit-control').
-
- ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
- while encoding to variants of ISO 2022 coding system, one of the
- following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR). It
- is -1 for such a character that is used only internally
- (e.g. `eight-bit-control').
-
- REVERSE-CHARSET (integer) is the charset which differs only in
- LEFT-TO-RIGHT value from the charset. If there's no such a
- charset, the value is -1.
-
- SHORT-NAME (string) is the short name to refer to the charset.
-
- LONG-NAME (string) is the long name to refer to the charset.
-
- DESCRIPTION (string) is the description string of the charset.
-
- PLIST (property list) may contain any type of information a user
- wants to put and get by functions `put-charset-property' and
- `get-charset-property' respectively. */
-extern Lisp_Object Vcharset_table;
-
-/* Macros to access various information of CHARSET in Vcharset_table.
- We provide these macros for efficiency. No range check of CHARSET. */
-
-/* Return entry of CHARSET (C integer) in Vcharset_table. */
-#define CHARSET_TABLE_ENTRY(charset) \
- XCHAR_TABLE (Vcharset_table)->contents[((charset) == CHARSET_ASCII \
- ? 0 : (charset) + 128)]
-
-/* Return information INFO-IDX of CHARSET. */
-#define CHARSET_TABLE_INFO(charset, info_idx) \
- XVECTOR (CHARSET_TABLE_ENTRY (charset))->contents[info_idx]
-
-#define CHARSET_ID_IDX (0)
-#define CHARSET_BYTES_IDX (1)
-#define CHARSET_DIMENSION_IDX (2)
-#define CHARSET_CHARS_IDX (3)
-#define CHARSET_WIDTH_IDX (4)
-#define CHARSET_DIRECTION_IDX (5)
-#define CHARSET_LEADING_CODE_BASE_IDX (6)
-#define CHARSET_LEADING_CODE_EXT_IDX (7)
-#define CHARSET_ISO_FINAL_CHAR_IDX (8)
-#define CHARSET_ISO_GRAPHIC_PLANE_IDX (9)
-#define CHARSET_REVERSE_CHARSET_IDX (10)
-#define CHARSET_SHORT_NAME_IDX (11)
-#define CHARSET_LONG_NAME_IDX (12)
-#define CHARSET_DESCRIPTION_IDX (13)
-#define CHARSET_PLIST_IDX (14)
-/* Size of a vector of each entry of Vcharset_table. */
-#define CHARSET_MAX_IDX (15)
-
-/* And several more macros to be used frequently. */
-#define CHARSET_BYTES(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX))
-#define CHARSET_DIMENSION(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX))
-#define CHARSET_CHARS(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX))
-#define CHARSET_WIDTH(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX))
-#define CHARSET_DIRECTION(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX))
-#define CHARSET_LEADING_CODE_BASE(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX))
-#define CHARSET_LEADING_CODE_EXT(charset) \
- XFASTINT (CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX))
-#define CHARSET_ISO_FINAL_CHAR(charset) \
- XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX))
-#define CHARSET_ISO_GRAPHIC_PLANE(charset) \
- XINT (CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX))
-#define CHARSET_REVERSE_CHARSET(charset) \
- XINT (CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX))
-
-/* Macros to specify direction of a charset. */
-#define CHARSET_DIRECTION_LEFT_TO_RIGHT 0
-#define CHARSET_DIRECTION_RIGHT_TO_LEFT 1
-
-/* A vector of charset symbol indexed by charset-id. This is used
- only for returning charset symbol from C functions. */
-extern Lisp_Object Vcharset_symbol_table;
-
-/* Return symbol of CHARSET. */
-#define CHARSET_SYMBOL(charset) \
- XVECTOR (Vcharset_symbol_table)->contents[charset]
-
-/* 1 if CHARSET is in valid value range, else 0. */
-#define CHARSET_VALID_P(charset) \
- ((charset) == 0 \
- || ((charset) > 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \
- || ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 \
- && (charset) <= MAX_CHARSET) \
- || ((charset) == CHARSET_8_BIT_CONTROL) \
- || ((charset) == CHARSET_8_BIT_GRAPHIC))
-
-/* 1 if CHARSET is already defined, else 0. */
-#define CHARSET_DEFINED_P(charset) \
- (((charset) >= 0) && ((charset) <= MAX_CHARSET) \
- && !NILP (CHARSET_TABLE_ENTRY (charset)))
-
-/* Since the information CHARSET-BYTES and CHARSET-WIDTH of
- Vcharset_table can be retrieved only by the first byte of
- multi-byte form (an ASCII code or a base leading-code), we provide
- here tables to be used by macros BYTES_BY_CHAR_HEAD and
- WIDTH_BY_CHAR_HEAD for faster information retrieval. */
-extern int bytes_by_char_head[256];
-extern int width_by_char_head[256];
-
-#define BYTES_BY_CHAR_HEAD(char_head) \
- (ASCII_BYTE_P (char_head) ? 1 : bytes_by_char_head[char_head])
-#define WIDTH_BY_CHAR_HEAD(char_head) \
- (ASCII_BYTE_P (char_head) ? 1 : width_by_char_head[char_head])
-
-/* Charset of the character C. */
-#define CHAR_CHARSET(c) \
- (SINGLE_BYTE_CHAR_P (c) \
- ? (ASCII_BYTE_P (c) \
- ? CHARSET_ASCII \
- : (c) < 0xA0 ? CHARSET_8_BIT_CONTROL : CHARSET_8_BIT_GRAPHIC) \
- : ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \
- ? CHAR_FIELD2 (c) + 0x70 \
- : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \
- ? CHAR_FIELD1 (c) + 0x8F \
- : CHAR_FIELD1 (c) + 0xE0)))