Revision: miles@gnu.org--gnu-2005/emacs--unicode--0--patch-86

[gnu-emacs] / src / character.c
diff --git a/src/character.c b/src/character.c

index 2031e5e9b362bb59b32dca12e0f0a8fbf3172320..91b889c3836b1cf1fc90519b98909591c98b848b 100644 (file)
--- a/src/character.c
+++ b/src/character.c
@@ -1,8 +1,8 @@
  /* Basic character support.
     Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
-   Licensed to the Free Software Foundation.
-   Copyright (C) 2001 Free Software Foundation, Inc.
-   Copyright (C) 2001, 2002
+     Licensed to the Free Software Foundation.
+   Copyright (C) 2001, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2003
       National Institute of Advanced Industrial Science and Technology (AIST)
       Registration Number H13PRO009
  
@@ -59,6 +59,8 @@ Lisp_Object Vauto_fill_chars;
  
  Lisp_Object Qauto_fill_chars;
  
+/* Char-table of information about which character to unify to which
+   Unicode character.  */
  Lisp_Object Vchar_unify_table;
  
  /* A char-table.  An element is non-nil iff the corresponding
@@ -84,18 +86,66 @@ static Lisp_Object Qchar_script_table;
  /* Mapping table from unibyte chars to multibyte chars.  */
  int unibyte_to_multibyte_table[256];
  
+/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
+   char.  */
+char unibyte_has_multibyte_table[256];
+
  \f
  
+/* Store multibyte form of character C at P.  If C has modifier bits,
+   handle them appropriately.  */
+
  int
-char_string_with_unification (c, p)
+char_string (c, p)
       int c;
       unsigned char *p;
  {
    int bytes;
  
+  if (c & CHAR_MODIFIER_MASK)
+    {
+      /* As an non-ASCII character can't have modifier bits, we just
+        ignore the bits.  */
+      if (ASCII_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
+       {
+         /* For Meta, Shift, and Control modifiers, we need special care.  */
+         if (c & CHAR_META)
+           {
+             /* Move the meta bit to the right place for a string.  */
+             c = (c & ~CHAR_META) | 0x80;
+           }
+         if (c & CHAR_SHIFT)
+           {
+             /* Shift modifier is valid only with [A-Za-z].  */
+             if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
+               c &= ~CHAR_SHIFT;
+             else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
+               c = (c & ~CHAR_SHIFT) - ('a' - 'A');
+           }
+         if (c & CHAR_CTL)
+           {
+             /* Simulate the code in lread.c.  */
+             /* Allow `\C- ' and `\C-?'.  */
+             if (c == (CHAR_CTL | ' '))
+               c = 0;
+             else if (c == (CHAR_CTL | '?'))
+               c = 127;
+             /* ASCII control chars are made from letters (both cases),
+                as well as the non-letters within 0100...0137.  */
+             else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
+               c &= (037 | (~0177 & ~CHAR_CTL));
+             else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
+               c &= (037 | (~0177 & ~CHAR_CTL));
+           }
+       }
+
+      /* If C still has any modifier bits, just ignore it.  */
+      c &= ~CHAR_MODIFIER_MASK;
+    }
+
    MAYBE_UNIFY_CHAR (c);
  
-  if (c <= MAX_3_BYTE_CHAR || c > MAX_5_BYTE_CHAR)
+  if (c <= MAX_3_BYTE_CHAR)
      {
        bytes = CHAR_STRING (c, p);
      }
@@ -107,7 +157,7 @@ char_string_with_unification (c, p)
        p[3] = (0x80 | (c & 0x3F));
        bytes = 4;
      }
-  else
+  else if (c <= MAX_5_BYTE_CHAR)
      {
        p[0] = 0xF8;
        p[1] = (0x80 | ((c >> 18) & 0x0F));
@@ -116,13 +166,25 @@ char_string_with_unification (c, p)
        p[4] = (0x80 | (c & 0x3F));
        bytes = 5;
      }
+  else
+    {
+      c = CHAR_TO_BYTE8 (c);
+      bytes = BYTE8_STRING (c, p);
+    }
  
    return bytes;
  }
  
  
+/* Return a character whose multibyte form is at P.  Set LEN is not
+   NULL, it must be a pointer to integer.  In that case, set *LEN to
+   the byte length of the multibyte form.  If ADVANCED is not NULL, is
+   must be a pointer to unsigned char.  In that case, set *ADVANCED to
+   the ending address (i.e. the starting address of the next
+   character) of the multibyte form.  */
+
  int
-string_char_with_unification (p, advanced, len)
+string_char (p, advanced, len)
       const unsigned char *p;
       const unsigned char **advanced;
       int *len;
@@ -164,21 +226,28 @@ string_char_with_unification (p, advanced, len)
  /* Translate character C by translation table TABLE.  If C is
     negative, translate a character specified by CHARSET and CODE.  If
     no translation is found in TABLE, return the untranslated
-   character.  */
+   character.  If TABLE is a list, elements are char tables.  In this
+   case, translace C by all tables.  */
  
  int
  translate_char (table, c)
       Lisp_Object table;
       int c;
  {
-  Lisp_Object ch;
-
-  if (! CHAR_TABLE_P (table))
-    return c;
-  ch = CHAR_TABLE_REF (table, c);
-  if (! CHARACTERP (ch))
-    return c;
-  return XINT (ch);
+  if (CHAR_TABLE_P (table))
+    {
+      Lisp_Object ch;
+
+      ch = CHAR_TABLE_REF (table, c);
+      if (CHARACTERP (ch))
+       c = XINT (ch);
+    }
+  else
+    {
+      for (; CONSP (table); table = XCDR (table))
+       c = translate_char (XCAR (table), c);
+    }
+  return c;
  }
  
  /* Convert the multibyte character C to unibyte 8-bit character based
@@ -301,7 +370,7 @@ Tab is taken to occupy `tab-width' columns.  */)
  
  int
  c_string_width (str, len, precision, nchars, nbytes)
-     unsigned char *str;
+     const unsigned char *str;
       int precision, *nchars, *nbytes;
  {
    int i = 0, i_byte = 0;
@@ -372,8 +441,8 @@ lisp_string_width (string, precision, nchars, nbytes)
       Lisp_Object string;
       int precision, *nchars, *nbytes;
  {
-  int len = XSTRING (string)->size;
-  unsigned char *str = XSTRING (string)->data;
+  int len = SCHARS (string);
+  unsigned char *str = SDATA (string);
    int i = 0, i_byte = 0;
    int width = 0;
    struct Lisp_Char_Table *dp = buffer_display_table ();
@@ -383,7 +452,7 @@ lisp_string_width (string, precision, nchars, nbytes)
        int chars, bytes, thiswidth;
        Lisp_Object val;
        int cmp_id;
-      int ignore, end;
+      EMACS_INT ignore, end;
  
        if (find_composition (i, -1, &ignore, &end, &val, string)
           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
@@ -463,23 +532,6 @@ The returned value is 0 for left-to-right and 1 for right-to-left.  */)
    return CHAR_TABLE_REF (Vchar_direction_table, c);
  }
  
-DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
-       doc: /* Return number of characters between BEG and END.
-This is now an obsolete function.  We keep it just for backward compatibility.  */)
-     (beg, end)
-     Lisp_Object beg, end;
-{
-  int from, to;
-
-  CHECK_NUMBER_COERCE_MARKER (beg);
-  CHECK_NUMBER_COERCE_MARKER (end);
-
-  from = min (XFASTINT (beg), XFASTINT (end));
-  to = max (XFASTINT (beg), XFASTINT (end));
-
-  return make_number (to - from);
-}
-
  /* Return the number of characters in the NBYTES bytes at PTR.
     This works by looking at the contents and checking for multibyte
     sequences while assuming that there's no invalid sequence.
@@ -488,7 +540,7 @@ This is now an obsolete function.  We keep it just for backward compatibility.
  
  int
  chars_in_text (ptr, nbytes)
-     unsigned char *ptr;
+     const unsigned char *ptr;
       int nbytes;
  {
    /* current_buffer is null at early stages of Emacs initialization.  */
@@ -506,10 +558,10 @@ chars_in_text (ptr, nbytes)
  
  int
  multibyte_chars_in_text (ptr, nbytes)
-     unsigned char *ptr;
+     const unsigned char *ptr;
       int nbytes;
  {
-  unsigned char *endp = ptr + nbytes;
+  const unsigned char *endp = ptr + nbytes;
    int chars = 0;
  
    while (ptr < endp)
@@ -533,15 +585,15 @@ multibyte_chars_in_text (ptr, nbytes)
  
  void
  parse_str_as_multibyte (str, len, nchars, nbytes)
-     unsigned char *str;
+     const unsigned char *str;
       int len, *nchars, *nbytes;
  {
-  unsigned char *endp = str + len;
+  const unsigned char *endp = str + len;
    int n, chars = 0, bytes = 0;
  
    if (len >= MAX_MULTIBYTE_LENGTH)
      {
-      unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
+      const unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
        while (str < adjusted_endp)
         {
           if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
@@ -628,7 +680,7 @@ str_as_multibyte (str, len, nbytes, nchars)
         {
           while (n--)
             *to++ = *p++;
-       }         
+       }
        else
         {
           int c = *p++;
@@ -682,7 +734,7 @@ str_to_multibyte (str, len, bytes)
    endp = str + len;
    safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
    p = endp - bytes;
-  while (p < endp)      
+  while (p < endp)
      {
        int c = *p++;
  
@@ -715,7 +767,7 @@ str_as_unibyte (str, bytes)
        p += len;
      }
    to = str + (p - str);
-  while (p < endp)      
+  while (p < endp)
      {
        c = *p;
        len = BYTES_BY_CHAR_HEAD (c);
@@ -737,8 +789,8 @@ string_count_byte8 (string)
       Lisp_Object string;
  {
    int multibyte = STRING_MULTIBYTE (string);
-  int nbytes = STRING_BYTES (XSTRING (string));
-  unsigned char *p = XSTRING (string)->data;
+  int nbytes = SBYTES (string);
+  unsigned char *p = SDATA (string);
    unsigned char *pend = p + nbytes;
    int count = 0;
    int c, len;
@@ -767,8 +819,8 @@ Lisp_Object
  string_escape_byte8 (string)
       Lisp_Object string;
  {
-  int nchars = XSTRING (string)->size;
-  int nbytes = STRING_BYTES (XSTRING (string));
+  int nchars = SCHARS (string);
+  int nbytes = SBYTES (string);
    int multibyte = STRING_MULTIBYTE (string);
    int byte8_count;
    const unsigned char *src, *src_end;
@@ -792,9 +844,9 @@ string_escape_byte8 (string)
      /* Convert 1-byte sequence of byte8 chars to 4-byte octal.  */
      val = make_uninit_string (nbytes + byte8_count * 3);
  
-  src = XSTRING (string)->data;
+  src = SDATA (string);
    src_end = src + nbytes;
-  dst = XSTRING (val)->data;
+  dst = SDATA (val);
    if (multibyte)
      while (src < src_end)
        {
@@ -827,7 +879,7 @@ string_escape_byte8 (string)
  }
  
  \f
-DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
+DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
         doc: /*
  Concatenate all the argument characters and make the result a string.
  usage: (string &rest CHARACTERS)  */)
@@ -874,7 +926,6 @@ syms_of_character ()
    defsubr (&Schar_width);
    defsubr (&Sstring_width);
    defsubr (&Schar_direction);
-  defsubr (&Schars_in_region);
    defsubr (&Sstring);
  
    DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
@@ -927,3 +978,6 @@ It has one extra slot whose value is a list of script symbols.  */);
  }
  
  #endif /* emacs */
+
+/* arch-tag: b6665960-3c3d-4184-85cd-af4318197999
+   (do not change this comment) */