Revision: miles@gnu.org--gnu-2005/emacs--unicode--0--patch-86

[gnu-emacs] / src / character.h
diff --git a/src/character.h b/src/character.h

index f603140e769a5a6b1b47c7f5a97e6c08bbdc729b..255afb2a0b13f9b1b9653166626f2441cae11d2f 100644 (file)
--- a/src/character.h
+++ b/src/character.h
@@ -1,7 +1,7 @@
  /* Header for multibyte character handler.
     Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
-   Licensed to the Free Software Foundation.
-   Copyright (C) 2001, 2002
+     Licensed to the Free Software Foundation.
+   Copyright (C) 2003
       National Institute of Advanced Industrial Science and Technology (AIST)
       Registration Number H13PRO009
  
@@ -32,10 +32,11 @@ Boston, MA 02111-1307, USA.  */
        800-FFFF         E0..EF     1110xxxx 10xxxxxx 10xxxxxx
      10000-1FFFFF       F0..F7     11110xxx 10xxxxxx 10xxxxxx 10xxxxxx
     200000-3FFF7F       F8         11111000 1000xxxx 10xxxxxx 10xxxxxx 10xxxxxx
-      invalid          F9..FF
+   3FFF80-3FFFFF       C0..C1     1100000x 10xxxxxx (for eight-bit-char)
+   400000-...          invalid
  
-   raw-8-bit
-   3FFF80-3FFFFF       C0..C1     1100000x 10xxxxxx
+   invalid 1st byte    80..BF     10xxxxxx
+                       F9..FF     11111xxx (xxx != 000)
  */
  
  /* Maximum character code ((1 << CHARACTERBITS) - 1).  */
@@ -68,26 +69,36 @@ Boston, MA 02111-1307, USA.  */
     that corresponds to a raw 8-bit byte.  */
  #define CHAR_BYTE8_HEAD_P(byte) ((byte) == 0xC0 || (byte) == 0xC1)
  
-/* If C is not ASCII, make it unibyte. */
+/* Mapping table from unibyte chars to multibyte chars.  */
+extern int unibyte_to_multibyte_table[256];
  
-#define MAKE_CHAR_UNIBYTE(c)                   \
-  if (! ASCII_CHAR_P (c))                      \
-    c = multibyte_char_to_unibyte (c, Qnil);   \
-  else
+/* Convert the unibyte character C to the corresponding multibyte
+   character.  If C can't be converted, return C.  */
+#define unibyte_char_to_multibyte(c)   \
+  ((c) < 256 ? unibyte_to_multibyte_table[(c)] : (c))
  
+/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
+   char.  */
+extern char unibyte_has_multibyte_table[256];
  
-/* If C is not ASCII, make it multibyte. */
+#define UNIBYTE_CHAR_HAS_MULTIBYTE_P(c) (unibyte_has_multibyte_table[(c)])
  
-#define MAKE_CHAR_MULTIBYTE(c)         \
-  if (! ASCII_CHAR_P (c))              \
-    c = unibyte_char_to_multibyte (c); \
-  else
+/* If C is not ASCII, make it unibyte. */
+#define MAKE_CHAR_UNIBYTE(c)   \
+  do {                         \
+    if (! ASCII_CHAR_P (c))    \
+      c = CHAR_TO_BYTE8 (c);   \
+  } while (0)
  
  
+/* If C is not ASCII, make it multibyte.  It assumes C < 256.  */
+#define MAKE_CHAR_MULTIBYTE(c) ((c) = unibyte_to_multibyte_table[(c)])
+
  /* This is the maximum byte length of multibyte form.  */
  #define MAX_MULTIBYTE_LENGTH 5
  
-/* Return a Lisp character whose character code is C. */
+/* Return a Lisp character whose character code is C.  It assumes C is
+   a valid character code.  */
  #define make_char(c) make_number (c)
  
  /* Nonzero iff C is an ASCII byte.  */
@@ -106,6 +117,20 @@ Boston, MA 02111-1307, USA.  */
      if (! CHARACTERP(x)) x = wrong_type_argument (Qcharacterp, (x));   \
    } while (0)
  
+#define CHECK_CHARACTER_CAR(x) \
+  do {                                 \
+    Lisp_Object tmp = XCAR (x);                \
+    CHECK_CHARACTER (tmp);             \
+    XSETCAR ((x), tmp);                        \
+  } while (0)
+
+#define CHECK_CHARACTER_CDR(x) \
+  do {                                 \
+    Lisp_Object tmp = XCDR (x);                \
+    CHECK_CHARACTER (tmp);             \
+    XSETCDR ((x), tmp);                        \
+  } while (0)
+
  /* Nonzero iff C is an ASCII character.  */
  #define ASCII_CHAR_P(c) ((unsigned) (c) < 0x80)
  
@@ -126,6 +151,17 @@ Boston, MA 02111-1307, USA.  */
      : (c) <= MAX_5_BYTE_CHAR ? 5       \
      : 2)
  
+
+/* Return the leading code of multibyte form of C.  */
+#define CHAR_LEADING_CODE(c)                           \
+  ((c) <= MAX_1_BYTE_CHAR ? c                          \
+   : (c) <= MAX_2_BYTE_CHAR ? (0xC0 | ((c) >> 6))      \
+   : (c) <= MAX_3_BYTE_CHAR ? (0xE0 | ((c) >> 12))     \
+   : (c) <= MAX_4_BYTE_CHAR ? (0xF0 | ((c) >> 18))     \
+   : (c) <= MAX_5_BYTE_CHAR ? 0xF8                     \
+   : (0xC0 | (((c) >> 6) & 0x01)))
+
+
  /* Store multibyte form of the character C in P.  The caller should
     allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
     Returns the length of the multibyte form.  */
@@ -143,19 +179,15 @@ Boston, MA 02111-1307, USA.  */
        (p)[1] = (0x80 | (((c) >> 6) & 0x3F)),   \
        (p)[2] = (0x80 | ((c) & 0x3F)),          \
        3)                                       \
-   : (unsigned) (c) <= MAX_5_BYTE_CHAR         \
-   ? char_string_with_unification (c, p)       \
-   : ((p)[0] = (0xC0 | (((c) >> 6) & 0x01)),   \
-      (p)[1] = (0x80 | ((c) & 0x3F)),          \
-      2))
+   : char_string (c, p))
  
-/* Store multibyte form of eight-bit char B in P.  The caller should
-   allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
-   Returns the length of the multibyte form.  */
+/* Store multibyte form of byte B in P.  The caller should allocate at
+   least MAX_MULTIBYTE_LENGTH bytes area at P in advance.  Returns the
+   length of the multibyte form.  */
  
  #define BYTE8_STRING(b, p)                     \
    ((p)[0] = (0xC0 | (((b) >> 6) & 0x01)),      \
-   (p)[1] = (0x80 | ((c) & 0x3F)),             \
+   (p)[1] = (0x80 | ((b) & 0x3F)),             \
     2)
  
  
@@ -163,24 +195,22 @@ Boston, MA 02111-1307, USA.  */
     allocate at least MAX_MULTIBYTE_LENGTH bytes area at P in advance.
     And, advance P to the end of the multibyte form.  */
  
-#define CHAR_STRING_ADVANCE(c, p)                      \
-  do {                                                 \
-    if ((c) <= MAX_1_BYTE_CHAR)                                \
-      *(p)++ = (c);                                    \
-    else if ((c) <= MAX_2_BYTE_CHAR)                   \
-      *(p)++ = (0xC0 | ((c) >> 6)),                    \
-       *(p)++ = (0x80 | ((c) & 0x3F));                 \
-    else if ((c) <= MAX_3_BYTE_CHAR)                   \
-      *(p)++ = (0xE0 | ((c) >> 12)),                   \
-       *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),          \
-       *(p)++ = (0x80 | ((c) & 0x3F));                 \
-    else if ((c) <= MAX_5_BYTE_CHAR)                   \
-      (p) += char_string_with_unification ((c), (p));  \
-    else                                               \
-      *(p)++ = (0xC0 | (((c) >> 6) & 0x01)),           \
-       *(p)++ = (0x80 | ((c) & 0x3F));                 \
+#define CHAR_STRING_ADVANCE(c, p)              \
+  do {                                         \
+    if ((c) <= MAX_1_BYTE_CHAR)                        \
+      *(p)++ = (c);                            \
+    else if ((c) <= MAX_2_BYTE_CHAR)           \
+      *(p)++ = (0xC0 | ((c) >> 6)),            \
+       *(p)++ = (0x80 | ((c) & 0x3F));         \
+    else if ((c) <= MAX_3_BYTE_CHAR)           \
+      *(p)++ = (0xE0 | ((c) >> 12)),           \
+       *(p)++ = (0x80 | (((c) >> 6) & 0x3F)),  \
+       *(p)++ = (0x80 | ((c) & 0x3F));         \
+    else                                       \
+      (p) += char_string ((c), (p));           \
    } while (0)
  
+
  /* Nonzero iff BYTE starts a non-ASCII character in a multibyte
     form.  */
  #define LEADING_CODE_P(byte) (((byte) & 0xC0) == 0xC0)
@@ -256,6 +286,32 @@ Boston, MA 02111-1307, USA.  */
     : (p)[0] == 0xF8 && ((p)[1] & 0xF0) == 0x80 ? 5     \
     : 0)
  
+/* If P is before LIMIT, advance P to the next character boundary.  It
+   assumes that P is already at a character boundary of the sane
+   mulitbyte form whose end address is LIMIT.  */
+
+#define NEXT_CHAR_BOUNDARY(p, limit)   \
+  do {                                 \
+    if ((p) < (limit))                 \
+      (p) += BYTES_BY_CHAR_HEAD (*(p));        \
+  } while (0)
+
+
+/* If P is after LIMIT, advance P to the previous character boundary.
+   It assumes that P is already at a character boundary of the sane
+   mulitbyte form whose beginning address is LIMIT.  */
+
+#define PREV_CHAR_BOUNDARY(p, limit)                                   \
+  do {                                                                 \
+    if ((p) > (limit))                                                 \
+      {                                                                        \
+       const unsigned char *p0 = (p);                                  \
+       do {                                                            \
+         p0--;                                                         \
+       } while (p0 >= limit && ! CHAR_HEAD_P (*p0));                   \
+       (p) = (BYTES_BY_CHAR_HEAD (*p0) == (p) - p0) ? p0 : (p) - 1;    \
+      }                                                                        \
+  } while (0)
  
  /* Return the character code of character whose multibyte form is at
     P.  The argument LEN is ignored.  It will be removed in the
@@ -272,7 +328,7 @@ Boston, MA 02111-1307, USA.  */
     ? ((((p)[0] & 0x0F) << 12)                                  \
        | (((p)[1] & 0x3F) << 6)                                 \
        | ((p)[2] & 0x3F))                                       \
-   : string_char_with_unification ((p), NULL, NULL))
+   : string_char ((p), NULL, NULL))
  
  
  /* Like STRING_CHAR but set ACTUAL_LEN to the length of multibyte
@@ -292,10 +348,10 @@ Boston, MA 02111-1307, USA.  */
        ((((p)[0] & 0x0F) << 12)                                 \
         | (((p)[1] & 0x3F) << 6)                                        \
         | ((p)[2] & 0x3F)))                                     \
-   : string_char_with_unification ((p), NULL, &actual_len))
+   : string_char ((p), NULL, &actual_len))
  
  
-/* Like STRING_CHAR but advacen P to the end of multibyte form.  */
+/* Like STRING_CHAR but advance P to the end of multibyte form.  */
  
  #define STRING_CHAR_ADVANCE(p)                                 \
    (!((p)[0] & 0x80)                                            \
@@ -304,13 +360,13 @@ Boston, MA 02111-1307, USA.  */
     ? ((p) += 2,                                                        \
        ((((p)[-2] & 0x1F) << 6)                                 \
         | ((p)[-1] & 0x3F)                                      \
-       | (((unsigned char) (p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))   \
+       | ((unsigned char) ((p)[-2]) < 0xC2 ? 0x3FFF80 : 0)))   \
     : ! ((p)[0] & 0x10)                                         \
     ? ((p) += 3,                                                        \
        ((((p)[-3] & 0x0F) << 12)                                        \
         | (((p)[-2] & 0x3F) << 6)                               \
         | ((p)[-1] & 0x3F)))                                    \
-   : string_char_with_unification ((p), &(p), NULL))
+   : string_char ((p), &(p), NULL))
  
  
  /* Fetch the "next" character from Lisp string STRING at byte position
@@ -337,6 +393,29 @@ Boston, MA 02111-1307, USA.  */
      }                                                                  \
    else
  
+/* Like FETCH_STRING_CHAR_ADVANCE but return a multibyte character eve
+   if STRING is unibyte.  */
+
+#define FETCH_STRING_CHAR_AS_MULTIBYTE_ADVANCE(OUTPUT, STRING, CHARIDX, BYTEIDX) \
+  if (1)                                                                     \
+    {                                                                        \
+      CHARIDX++;                                                             \
+      if (STRING_MULTIBYTE (STRING))                                         \
+       {                                                                     \
+         unsigned char *ptr = &XSTRING (STRING)->data[BYTEIDX];              \
+         int len;                                                            \
+                                                                             \
+         OUTPUT = STRING_CHAR_AND_LENGTH (ptr, 0, len);                      \
+         BYTEIDX += len;                                                     \
+       }                                                                     \
+      else                                                                   \
+       {                                                                     \
+         OUTPUT = XSTRING (STRING)->data[BYTEIDX++];                         \
+         MAKE_CHAR_MULTIBYTE (OUTPUT);                                       \
+       }                                                                     \
+    }                                                                        \
+  else
+
  
  /* Like FETCH_STRING_CHAR_ADVANCE but assumes STRING is multibyte.  */
  
@@ -377,7 +456,7 @@ Boston, MA 02111-1307, USA.  */
    else
  
  
-/* Like FETCH_CHAR_ADVANCE but assumes STRING is multibyte.  */
+/* Like FETCH_CHAR_ADVANCE but assumes the current buffer is multibyte.  */
  
  #define FETCH_CHAR_ADVANCE_NO_CHECK(OUTPUT, CHARIDX, BYTEIDX)  \
    if (1)                                                       \
@@ -480,8 +559,12 @@ Boston, MA 02111-1307, USA.  */
    } while (0)
  
  
+/* If C is a character to be unified with a Unicode character, return
+   the unified Unicode character.  */
+
  #define MAYBE_UNIFY_CHAR(c)                                    \
-  if (CHAR_TABLE_P (Vchar_unify_table))                                \
+  if (c > MAX_UNICODE_CHAR                                     \
+      && CHAR_TABLE_P (Vchar_unify_table))                     \
      {                                                          \
        Lisp_Object val;                                         \
        int unified;                                             \
@@ -523,19 +606,21 @@ Boston, MA 02111-1307, USA.  */
     ? ASCII_CHAR_WIDTH (c)      \
     : XINT (CHAR_TABLE_REF (Vchar_width_table, c)))
  
-extern int char_string_with_unification P_ ((int, unsigned char *));
-extern int string_char_with_unification P_ ((const unsigned char *,
-                                            const unsigned char **, int *));
+extern int char_resolve_modifier_mask P_ ((int));
+extern int char_string P_ ((int, unsigned char *));
+extern int string_char P_ ((const unsigned char *,
+                           const unsigned char **, int *));
  
  extern int translate_char P_ ((Lisp_Object, int c));
  extern int char_printable_p P_ ((int c));
-extern void parse_str_as_multibyte P_ ((unsigned char *, int, int *, int *));
+extern void parse_str_as_multibyte P_ ((const unsigned char *, int, int *,
+                                       int *));
  extern int parse_str_to_multibyte P_ ((unsigned char *, int));
  extern int str_as_multibyte P_ ((unsigned char *, int, int, int *));
  extern int str_to_multibyte P_ ((unsigned char *, int, int));
  extern int str_as_unibyte P_ ((unsigned char *, int));
  extern int strwidth P_ ((unsigned char *, int));
-extern int c_string_width P_ ((unsigned char *, int, int, int *, int *));
+extern int c_string_width P_ ((const unsigned char *, int, int, int *, int *));
  extern int lisp_string_width P_ ((Lisp_Object, int, int *, int *));
  
  extern Lisp_Object Vprintable_chars;
@@ -573,3 +658,6 @@ extern Lisp_Object Vchar_script_table;
    do { (sym) = intern ((name)); staticpro (&(sym)); } while (0)
  
  #endif /* EMACS_CHARACTER_H */
+
+/* arch-tag: 4ef86004-2eff-4073-8cea-cfcbcf7188ac
+   (do not change this comment) */