#define GENERIC_COMPOSITION_CHAR (GLYPH_MASK_CHAR)
/* 1 if C is an ASCII character, else 0. */
-#define SINGLE_BYTE_CHAR_P(c) ((c) < 0x100)
+#define SINGLE_BYTE_CHAR_P(c) ((c) >= 0 && (c) < 0x100)
/* 1 if C is an composite character, else 0. */
#define COMPOSITE_CHAR_P(c) ((c) >= MIN_CHAR_COMPOSITION)
CHARSET-ID (integer) is the identification number of the charset.
- BYTE (integer) is the length of multi-byte form of a character in
+ BYTES (integer) is the length of multi-byte form of a character in
the charset: one of 1, 2, 3, and 4.
DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
occupies on the screen: one of 0, 1, and 2.
DIRECTION (integer) is the rendering direction of characters in the
- charset when rendering. If 0, render from right to left, else
- render from left to right.
+ charset when rendering. If 0, render from left to right, else
+ render from right to left.
LEADING-CODE-BASE (integer) is the base leading-code for the
charset.
#define CHARSET_SYMBOL(charset) \
XVECTOR (Vcharset_symbol_table)->contents[charset]
-/* 1 if CHARSET is valid, else 0. */
+/* 1 if CHARSET is in valid value range, else 0. */
#define CHARSET_VALID_P(charset) \
((charset) == 0 \
|| ((charset) >= 0x80 && (charset) <= MAX_CHARSET_OFFICIAL_DIMENSION2) \
|| ((charset) >= MIN_CHARSET_PRIVATE_DIMENSION1 && (charset) <= MAX_CHARSET))
-/* 1 if CHARSET is already defined, else 0. */
+/* 1 if CHARSET is already defined (and not CHARSET_COMPOSITION), else 0. */
#define CHARSET_DEFINED_P(charset) \
(((charset) >= 0) && ((charset) <= MAX_CHARSET) \
&& !NILP (CHARSET_TABLE_ENTRY (charset)))
/* Since the information CHARSET-BYTES and CHARSET-WIDTH of
- Vcharset_table can be retrieved only from the first byte of
+ Vcharset_table can be retrieved only the first byte of
multi-byte form (an ASCII code or a base leading-code), we provide
here tables to be used by macros BYTES_BY_CHAR_HEAD and
WIDTH_BY_CHAR_HEAD for faster information retrieval. */
position-codes are C1 and C2. DIMENSION1 character ignores C2. */
#define MAKE_NON_ASCII_CHAR(charset, c1, c2) \
((charset) == CHARSET_COMPOSITION \
- ? MAKE_COMPOSITE_CHAR (((c1) << 7) + (c2)) \
+ ? ((c2) < 0 \
+ ? (((charset) - 0x70) << 7) + (c1) \
+ : MAKE_COMPOSITE_CHAR (((c1) << 7) + (c2))) \
: (! CHARSET_DEFINED_P (charset) || CHARSET_DIMENSION (charset) == 1 \
- ? (((charset) - 0x70) << 7) | (c1) \
+ ? (((charset) - 0x70) << 7) | ((c1) <= 0 ? 0 : (c1)) \
: ((charset) < MIN_CHARSET_PRIVATE_DIMENSION2 \
- ? (((charset) - 0x8F) << 14) | ((c1) << 7) | (c2) \
- : (((charset) - 0xE0) << 14) | ((c1) << 7) | (c2))))
+ ? ((((charset) - 0x8F) << 14) \
+ | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2))) \
+ : ((((charset) - 0xE0) << 14) \
+ | ((c1) <= 0 ? 0 : ((c1) << 7)) | ((c2) <= 0 ? 0 : (c2))))))
/* Return a composite character of which CMPCHAR-ID is ID. */
#define MAKE_COMPOSITE_CHAR(id) (MIN_CHAR_COMPOSITION + (id))
/* Return a character of which charset is CHARSET and position-codes
are C1 and C2. DIMENSION1 character ignores C2. */
-#define MAKE_CHAR(charset, c1, c2) \
- ((charset) == CHARSET_ASCII \
- ? (c1) \
- : MAKE_NON_ASCII_CHAR ((charset), (c1) & 0x7F, (c2) & 0x7F))
+#define MAKE_CHAR(charset, c1, c2) \
+ ((charset) == CHARSET_ASCII \
+ ? (c1) \
+ : MAKE_NON_ASCII_CHAR ((charset), (c1), (c2)))
/* If GENERICP is nonzero, return nonzero iff C is a valid normal or
generic character. If GENERICP is zero, return nonzero iff C is a
#define DEFAULT_NONASCII_INSERT_OFFSET 0x800
-/* Check if the character C is valid as a multibyte character. */
-
-#define VALID_MULTIBYTE_CHAR_P(c) \
- ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \
- ? (!NILP (XCHAR_TABLE (Vcharset_table)->contents[CHAR_FIELD2 (c) \
- + 0xF0]) \
- && CHAR_FIELD3 (c) >= 32) \
- : ((c) < MIN_CHAR_PRIVATE_DIMENSION2 \
- ? (!NILP (XCHAR_TABLE (Vcharset_table)->contents[CHAR_FIELD1 (c) \
- + 0x10F]) \
- && CHAR_FIELD2 (c) >= 32 && CHAR_FIELD3 (c) >= 32) \
- : ((c) < MIN_CHAR_COMPOSITION \
- ? (!NILP (XCHAR_TABLE (Vcharset_table)->contents[CHAR_FIELD1 (c) \
- + 0x160]) \
- && CHAR_FIELD2 (c) >= 32 && CHAR_FIELD3 (c) >= 32) \
- : (c) < MIN_CHAR_COMPOSITION + n_cmpchars)))
+/* Parse composite character string STR of length LENGTH (>= 2) and
+ set BYTES to the length of actual multibyte sequence.
+
+ It is assumed that *STR is LEADING_CODE_COMPOSITION and the
+ following (LENGTH - 1) bytes satisfy !CHAR_HEAD_P.
+
+ Actually, the whole multibyte sequence starting with
+ LEADING_CODE_COMPOSITION is treated as a single multibyte
+ character. So, here, we just set BYTES to LENGTH.
+
+ This macro should be called only from PARSE_MULTIBYTE_SEQ. */
+
+#define PARSE_COMPOSITE_SEQ(str, length, bytes) \
+ do { \
+ (bytes) = (length); \
+ } while (0)
+
+
+/* Parse non-composite multibyte character string STR of length
+ LENGTH (>= 2) and set BYTES to the length of actual multibyte
+ sequence.
+
+ It is assumed that *STR is one of base leading codes (excluding
+ LEADING_CODE_COMPOSITION) and the following (LENGTH - 1) bytes
+ satisfy !CHAR_HEAD_P.
+
+ This macro should be called only from PARSE_MULTIBYTE_SEQ. */
+
+#define PARSE_CHARACTER_SEQ(str, length, bytes) \
+ do { \
+ (bytes) = BYTES_BY_CHAR_HEAD ((str)[0]); \
+ if ((bytes) > (length)) \
+ (bytes) = (length); \
+ } while (0)
+
+/* Parse string STR of length LENGTH and check if a multibyte
+ characters is at STR. If so, set BYTES for that character, else
+ set BYTES to 1. */
+
+#define PARSE_MULTIBYTE_SEQ(str, length, bytes) \
+ do { \
+ int i = 1; \
+ if (ASCII_BYTE_P (*str)) \
+ bytes = 1; \
+ else \
+ { \
+ while (i < (length) && ! CHAR_HEAD_P ((str)[i])) i++; \
+ if (i == 1) \
+ (bytes) = 1; \
+ else if ((str)[0] == LEADING_CODE_COMPOSITION) \
+ PARSE_COMPOSITE_SEQ (str, i, bytes); \
+ else \
+ PARSE_CHARACTER_SEQ (str, i, bytes); \
+ } \
+ } while (0)
/* The charset of non-ASCII character C is stored in CHARSET, and the
position-codes of C are stored in C1 and C2.
Do not use this macro for an ASCII character. */
#define SPLIT_NON_ASCII_CHAR(c, charset, c1, c2) \
- ((c) < MIN_CHAR_OFFICIAL_DIMENSION2 \
- ? (charset = CHAR_FIELD2 (c) + 0x70, \
- c1 = CHAR_FIELD3 (c), \
- c2 = -1) \
- : (charset = ((c) < MIN_CHAR_COMPOSITION \
+ ((c) & CHAR_FIELD1_MASK \
+ ? (charset = ((c) < MIN_CHAR_COMPOSITION \
? (CHAR_FIELD1 (c) \
+ ((c) < MIN_CHAR_PRIVATE_DIMENSION2 ? 0x8F : 0xE0)) \
: CHARSET_COMPOSITION), \
c1 = CHAR_FIELD2 (c), \
- c2 = CHAR_FIELD3 (c)))
+ c2 = CHAR_FIELD3 (c)) \
+ : (charset = CHAR_FIELD2 (c) + 0x70, \
+ c1 = CHAR_FIELD3 (c), \
+ c2 = -1))
/* The charset of character C is stored in CHARSET, and the
position-codes of C are stored in C1 and C2.
- We store -1 in C2 if the character is just 2 bytes. */
+ We store -1 in C2 if the dimension of the charset is 1. */
#define SPLIT_CHAR(c, charset, c1, c2) \
(SINGLE_BYTE_CHAR_P (c) \
? charset = CHARSET_ASCII, c1 = (c), c2 = -1 \
: SPLIT_NON_ASCII_CHAR (c, charset, c1, c2))
+/* Return 1 iff character C has valid printable glyph. */
+#define CHAR_PRINTABLE_P(c) \
+ (SINGLE_BYTE_CHAR_P (c) \
+ || ((c) >= MIN_CHAR_COMPOSITION \
+ ? (c) < MAX_CHAR \
+ : char_printable_p (c)))
+
/* The charset of the character at STR is stored in CHARSET, and the
position-codes are stored in C1 and C2.
We store -1 in C2 if the character is just 2 bytes.
#define BASE_LEADING_CODE_P(c) (BYTES_BY_CHAR_HEAD ((unsigned char) (c)) > 1)
/* Return how many bytes C will occupy in a multibyte buffer. */
-#define CHAR_BYTES(c) (SINGLE_BYTE_CHAR_P (c) ? 1 : char_bytes (c))
+#define CHAR_BYTES(c) \
+ ((SINGLE_BYTE_CHAR_P ((c)) || ((c) & ~GLYPH_MASK_CHAR)) ? 1 : char_bytes (c))
/* The following two macros CHAR_STRING and STRING_CHAR are the main
entry points to convert between Emacs two types of character
is at STR and the length is LEN. If STR doesn't contain valid
multi-byte form, only the first byte in STR is returned. */
-#define STRING_CHAR(str, len) \
- ((BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
- || BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) > (len)) \
- ? (unsigned char) *(str) \
- : string_to_non_ascii_char (str, len, 0, 0))
+#define STRING_CHAR(str, len) \
+ (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
+ ? (unsigned char) *(str) \
+ : string_to_non_ascii_char (str, len, 0))
-/* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to
- the length of the multi-byte form. Just to know the length, use
+/* This is like STRING_CHAR but the third arg ACTUAL_LEN is set to the
+ length of the multi-byte form. Just to know the length, use
MULTIBYTE_FORM_LENGTH. */
-#define STRING_CHAR_AND_LENGTH(str, len, actual_len) \
- (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
- ? (actual_len = 1), (unsigned char) *(str) \
- : string_to_non_ascii_char (str, len, &actual_len, 0))
-
-/* This is like STRING_CHAR_AND_LENGTH but the third arg ACTUAL_LEN
- does not include garbage bytes following the multibyte character. */
-#define STRING_CHAR_AND_CHAR_LENGTH(str, len, actual_len) \
- (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
- ? (actual_len = 1), (unsigned char) *(str) \
- : string_to_non_ascii_char (str, len, &actual_len, 1))
+#define STRING_CHAR_AND_LENGTH(str, len, actual_len) \
+ (BYTES_BY_CHAR_HEAD ((unsigned char) *(str)) == 1 \
+ ? ((actual_len) = 1), (unsigned char) *(str) \
+ : string_to_non_ascii_char (str, len, &(actual_len)))
/* Fetch the "next" multibyte character from Lisp string STRING
at byte position BYTEIDX, character position CHARIDX.
#ifdef emacs
-/* Increase the buffer point POS of the current buffer to the next
- character boundary. This macro relies on the fact that *GPT_ADDR
- and *Z_ADDR are always accessible and the values are '\0'. No
- range checking of POS. */
-#define INC_POS(pos) \
- do { \
- unsigned char *p = BYTE_POS_ADDR (pos); \
- pos++; \
- if (BASE_LEADING_CODE_P (*p++)) \
- while (!CHAR_HEAD_P (*p)) p++, pos++; \
+/* Increase the buffer byte position POS_BYTE of the current buffer to
+ the next character boundary. This macro relies on the fact that
+ *GPT_ADDR and *Z_ADDR are always accessible and the values are
+ '\0'. No range checking of POS. */
+#define INC_POS(pos_byte) \
+ do { \
+ unsigned char *p = BYTE_POS_ADDR (pos_byte); \
+ if (BASE_LEADING_CODE_P (*p)) \
+ { \
+ int len, bytes; \
+ len = Z_BYTE - pos_byte; \
+ PARSE_MULTIBYTE_SEQ (p, len, bytes); \
+ pos_byte += bytes; \
+ } \
+ else \
+ pos_byte++; \
} while (0)
-/* Decrease the buffer point POS of the current buffer to the previous
- character boundary. No range checking of POS. */
-#define DEC_POS(pos) \
- do { \
- unsigned char *p, *p_min; \
- \
- pos--; \
- if (pos < GPT_BYTE) \
- p = BEG_ADDR + pos - 1, p_min = BEG_ADDR; \
- else \
- p = BEG_ADDR + GAP_SIZE + pos - 1, p_min = GAP_END_ADDR; \
- if (p > p_min && !CHAR_HEAD_P (*p)) \
- { \
- int pos_saved = pos--; \
- p--; \
- while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--; \
- if (!BASE_LEADING_CODE_P (*p)) pos = pos_saved; \
- } \
+/* Decrease the buffer byte position POS_BYTE of the current buffer to
+ the previous character boundary. No range checking of POS. */
+#define DEC_POS(pos_byte) \
+ do { \
+ unsigned char *p, *p_min; \
+ \
+ pos_byte--; \
+ if (pos_byte < GPT_BYTE) \
+ p = BEG_ADDR + pos_byte - 1, p_min = BEG_ADDR; \
+ else \
+ p = BEG_ADDR + GAP_SIZE + pos_byte - 1, p_min = GAP_END_ADDR; \
+ if (p > p_min && !CHAR_HEAD_P (*p)) \
+ { \
+ unsigned char *pend = p--; \
+ int len, bytes; \
+ while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
+ len = pend + 1 - p; \
+ PARSE_MULTIBYTE_SEQ (p, len, bytes); \
+ if (bytes == len) \
+ pos_byte -= len - 1; \
+ } \
} while (0)
/* Increment both CHARPOS and BYTEPOS, each in the appropriate way. */
} \
while (0)
-/* Increase the buffer point POS of the current buffer to the next
- character boundary. This macro relies on the fact that *GPT_ADDR
- and *Z_ADDR are always accessible and the values are '\0'. No
- range checking of POS. */
-#define BUF_INC_POS(buf, pos) \
- do { \
- unsigned char *p = BUF_BYTE_ADDRESS (buf, pos); \
- pos++; \
- if (BASE_LEADING_CODE_P (*p++)) \
- while (!CHAR_HEAD_P (*p)) p++, pos++; \
- } while (0)
-
-/* Decrease the buffer point POS of the current buffer to the previous
- character boundary. No range checking of POS. */
-#define BUF_DEC_POS(buf, pos) \
- do { \
- unsigned char *p, *p_min; \
- int pos_saved = --pos; \
- if (pos < BUF_GPT_BYTE (buf)) \
- { \
- p = BUF_BEG_ADDR (buf) + pos - 1; \
- p_min = BUF_BEG_ADDR (buf); \
- } \
- else \
- { \
- p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos - 1; \
- p_min = BUF_GAP_END_ADDR (buf); \
- } \
- if (p > p_min && !CHAR_HEAD_P (*p)) \
+/* Increase the buffer byte position POS_BYTE of the current buffer to
+ the next character boundary. This macro relies on the fact that
+ *GPT_ADDR and *Z_ADDR are always accessible and the values are
+ '\0'. No range checking of POS_BYTE. */
+#define BUF_INC_POS(buf, pos_byte) \
+ do { \
+ unsigned char *p = BUF_BYTE_ADDRESS (buf, pos_byte); \
+ if (BASE_LEADING_CODE_P (*p)) \
{ \
- int pos_saved = pos--; \
- p--; \
- while (p > p_min && !CHAR_HEAD_P (*p)) p--, pos--; \
- if (!BASE_LEADING_CODE_P (*p)) pos = pos_saved; \
+ int len, bytes; \
+ len = BUF_Z_BYTE (buf) - pos_byte; \
+ PARSE_MULTIBYTE_SEQ (p, len, bytes); \
+ pos_byte += bytes; \
} \
+ else \
+ pos_byte++; \
+ } while (0)
+
+/* Decrease the buffer byte position POS_BYTE of the current buffer to
+ the previous character boundary. No range checking of POS_BYTE. */
+#define BUF_DEC_POS(buf, pos_byte) \
+ do { \
+ unsigned char *p, *p_min; \
+ pos_byte--; \
+ if (pos_byte < BUF_GPT_BYTE (buf)) \
+ { \
+ p = BUF_BEG_ADDR (buf) + pos_byte - 1; \
+ p_min = BUF_BEG_ADDR (buf); \
+ } \
+ else \
+ { \
+ p = BUF_BEG_ADDR (buf) + BUF_GAP_SIZE (buf) + pos_byte - 1; \
+ p_min = BUF_GAP_END_ADDR (buf); \
+ } \
+ if (p > p_min && !CHAR_HEAD_P (*p)) \
+ { \
+ unsigned char *pend = p--; \
+ int len, bytes; \
+ while (p > p_min && !CHAR_HEAD_P (*p)) p--; \
+ len = pend + 1 - p; \
+ PARSE_MULTIBYTE_SEQ (p, len, bytes); \
+ if (bytes == len) \
+ pos_byte -= len - 1; \
+ } \
} while (0)
#endif /* emacs */
extern int translate_char P_ ((Lisp_Object, int, int, int, int));
extern int split_non_ascii_string P_ ((const unsigned char *, int, int *,
unsigned char *, unsigned char *));
-extern int string_to_non_ascii_char P_ ((const unsigned char *, int, int *,
- int));
+extern int string_to_non_ascii_char P_ ((const unsigned char *, int, int *));
extern int non_ascii_char_to_string P_ ((int, unsigned char *, unsigned char **));
+extern int char_printable_p P_ ((int c));
extern int multibyte_form_length P_ ((const unsigned char *, int));
extern int str_cmpchar_id P_ ((const unsigned char *, int));
extern int get_charset_id P_ ((Lisp_Object));
-extern int cmpchar_component P_ ((unsigned int, unsigned int));
+extern int cmpchar_component P_ ((int, int, int));
extern int find_charset_in_str P_ ((unsigned char *, int, int *,
- Lisp_Object, int));
+ Lisp_Object, int, int));
extern int strwidth P_ ((unsigned char *, int));
extern int char_bytes P_ ((int));
+extern int char_valid_p P_ ((int, int));
extern Lisp_Object Vtranslation_table_vector;
/* Return a translation table of id number ID. */
#define GET_TRANSLATION_TABLE(id) \
- (XCONS(XVECTOR(Vtranslation_table_vector)->contents[(id)])->cdr)
+ (XCDR(XVECTOR(Vtranslation_table_vector)->contents[(id)]))
/* A char-table for characters which may invoke auto-filling. */
extern Lisp_Object Vauto_fill_chars;
#define BCOPY_SHORT(from, to, len) \
do { \
int i = len; \
- unsigined char *from_p = from, *to_p = to; \
+ unsigned char *from_p = from, *to_p = to; \
while (i--) *from_p++ = *to_p++; \
} while (0)
+/* Length of C in bytes. */
+
+#define CHAR_LEN(C) \
+ (CHAR_CHARSET ((C)) == CHARSET_COMPOSITION \
+ ? cmpchar_table[COMPOSITE_CHAR_ID ((C))]->len \
+ : CHARSET_BYTES (CHAR_CHARSET ((C))))
+
+
#endif /* _CHARSET_H */