X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/ecae6af979abcbb5b45c33ee05ceb297678ec9a0..41d579ce4a2a86428f200788df4b15b936aa5076:/src/character.c

diff --git a/src/character.c b/src/character.c
index 828e220813..397481e5b3 100644
--- a/src/character.c
+++ b/src/character.c
@@ -1,18 +1,18 @@
 /* Basic character support.
-   Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
-     Licensed to the Free Software Foundation.
-   Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
-     Free Software Foundation, Inc.
-   Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
-     National Institute of Advanced Industrial Science and Technology (AIST)
-     Registration Number H13PRO009
+
+Copyright (C) 2001-2011  Free Software Foundation, Inc.
+Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
+  Licensed to the Free Software Foundation.
+Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011
+  National Institute of Advanced Industrial Science and Technology (AIST)
+  Registration Number H13PRO009
 
 This file is part of GNU Emacs.
 
-GNU Emacs is free software; you can redistribute it and/or modify
+GNU Emacs is free software: you can redistribute it and/or modify
 it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 3, or (at your option)
-any later version.
+the Free Software Foundation, either version 3 of the License, or
+(at your option) any later version.
 
 GNU Emacs is distributed in the hope that it will be useful,
 but WITHOUT ANY WARRANTY; without even the implied warranty of
@@ -20,9 +20,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 GNU General Public License for more details.
 
 You should have received a copy of the GNU General Public License
-along with GNU Emacs; see the file COPYING.  If not, write to the
-Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
-Boston, MA 02110-1301, USA.  */
+along with GNU Emacs.  If not, see <http://www.gnu.org/licenses/>.  */
 
 /* At first, see the document in `character.h' to understand the code
    in this file.  */
@@ -36,6 +34,7 @@ Boston, MA 02110-1301, USA.  */
 #ifdef emacs
 
 #include <sys/types.h>
+#include <setjmp.h>
 #include "lisp.h"
 #include "character.h"
 #include "buffer.h"
@@ -51,57 +50,24 @@ Boston, MA 02110-1301, USA.  */
 
 Lisp_Object Qcharacterp;
 
-/* Vector of translation table ever defined.
-   ID of a translation table is used to index this vector.  */
-Lisp_Object Vtranslation_table_vector;
-
-/* A char-table for characters which may invoke auto-filling.  */
-Lisp_Object Vauto_fill_chars;
-
 Lisp_Object Qauto_fill_chars;
 
 /* Char-table of information about which character to unify to which
-   Unicode character.  */
+   Unicode character.  Mainly used by the macro MAYBE_UNIFY_CHAR.  */
 Lisp_Object Vchar_unify_table;
 
-/* A char-table.  An element is non-nil iff the corresponding
-   character has a printable glyph.  */
-Lisp_Object Vprintable_chars;
-
-/* A char-table.  An elemnent is a column-width of the corresponding
-   character.  */
-Lisp_Object Vchar_width_table;
-
-/* A char-table.  An element is a symbol indicating the direction
-   property of corresponding character.  */
-Lisp_Object Vchar_direction_table;
-
 /* Variable used locally in the macro FETCH_MULTIBYTE_CHAR.  */
 unsigned char *_fetch_multibyte_char_p;
 
-/* Char table of scripts.  */
-Lisp_Object Vchar_script_table;
-
-/* Alist of scripts vs representative characters.  */
-Lisp_Object Vscript_representative_chars;
-
 static Lisp_Object Qchar_script_table;
 
-/* Mapping table from unibyte chars to multibyte chars.  */
-int unibyte_to_multibyte_table[256];
-
-/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
-   char.  */
-char unibyte_has_multibyte_table[256];
-
 
 
 /* If character code C has modifier masks, reflect them to the
    character code if possible.  Return the resulting code.  */
 
 int
-char_resolve_modifier_mask (c)
-     int c;
+char_resolve_modifier_mask (int c)
 {
   /* A non-ASCII character can't reflect modifier bits to the code.  */
   if (! ASCII_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
@@ -134,11 +100,13 @@ char_resolve_modifier_mask (c)
       else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 	c &= (037 | (~0177 & ~CHAR_CTL));
     }
+#if 0	/* This is outside the scope of this function.  (bug#4751)  */
   if (c & CHAR_META)
     {
       /* Move the meta bit to the right place for a string.  */
       c = (c & ~CHAR_META) | 0x80;
     }
+#endif
 
   return c;
 }
@@ -148,9 +116,7 @@ char_resolve_modifier_mask (c)
    handle them appropriately.  */
 
 int
-char_string (c, p)
-     unsigned c;
-     unsigned char *p;
+char_string (unsigned int c, unsigned char *p)
 {
   int bytes;
 
@@ -204,10 +170,7 @@ char_string (c, p)
    character) of the multibyte form.  */
 
 int
-string_char (p, advanced, len)
-     const unsigned char *p;
-     const unsigned char **advanced;
-     int *len;
+string_char (const unsigned char *p, const unsigned char **advanced, int *len)
 {
   int c;
   const unsigned char *saved_p = p;
@@ -250,9 +213,7 @@ string_char (p, advanced, len)
    case, translace C by all tables.  */
 
 int
-translate_char (table, c)
-     Lisp_Object table;
-     int c;
+translate_char (Lisp_Object table, int c)
 {
   if (CHAR_TABLE_P (table))
     {
@@ -270,56 +231,45 @@ translate_char (table, c)
   return c;
 }
 
-/* Convert the multibyte character C to unibyte 8-bit character based
-   on the current value of charset_unibyte.  If dimension of
-   charset_unibyte is more than one, return (C & 0xFF).
+/* Convert ASCII or 8-bit character C to unibyte.  If C is none of
+   them, return (C & 0xFF).
 
    The argument REV_TBL is now ignored.  It will be removed in the
    future.  */
 
 int
-multibyte_char_to_unibyte (c, rev_tbl)
-     int c;
-     Lisp_Object rev_tbl;
+multibyte_char_to_unibyte (int c, Lisp_Object rev_tbl)
 {
-  struct charset *charset;
-  unsigned c1;
-
+  if (c < 0x80)
+    return c;
   if (CHAR_BYTE8_P (c))
     return CHAR_TO_BYTE8 (c);
-  charset = CHARSET_FROM_ID (charset_unibyte);
-  c1 = ENCODE_CHAR (charset, c);
-  return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
+  return (c & 0xFF);
 }
 
 /* Like multibyte_char_to_unibyte, but return -1 if C is not supported
    by charset_unibyte.  */
 
 int
-multibyte_char_to_unibyte_safe (c)
-     int c;
+multibyte_char_to_unibyte_safe (int c)
 {
-  struct charset *charset;
-  unsigned c1;
-
+  if (c < 0x80)
+    return c;
   if (CHAR_BYTE8_P (c))
     return CHAR_TO_BYTE8 (c);
-  charset = CHARSET_FROM_ID (charset_unibyte);
-  c1 = ENCODE_CHAR (charset, c);
-  return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : -1);
+  return -1;
 }
 
 DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
        doc: /* Return non-nil if OBJECT is a character.  */)
-     (object, ignore)
-     Lisp_Object object, ignore;
+  (Lisp_Object object, Lisp_Object ignore)
 {
   return (CHARACTERP (object) ? Qt : Qnil);
 }
 
 DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
        doc: /* Return the character of the maximum code.  */)
-     ()
+  (void)
 {
   return make_number (MAX_CHAR);
 }
@@ -327,20 +277,15 @@ DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0,
 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
        Sunibyte_char_to_multibyte, 1, 1, 0,
        doc: /* Convert the byte CH to multibyte character.  */)
-     (ch)
-     Lisp_Object ch;
+  (Lisp_Object ch)
 {
   int c;
-  struct charset *charset;
 
   CHECK_CHARACTER (ch);
   c = XFASTINT (ch);
-  if (c >= 0400)
-    error ("Invalid unibyte character: %d", c);
-  charset = CHARSET_FROM_ID (charset_unibyte);
-  c = DECODE_CHAR (charset, c);
-  if (c < 0)
-    c = BYTE8_TO_CHAR (XFASTINT (ch));
+  if (c >= 0x100)
+    error ("Not a unibyte character: %d", c);
+  MAKE_CHAR_MULTIBYTE (c);
   return make_number (c);
 }
 
@@ -348,8 +293,7 @@ DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
        Smultibyte_char_to_unibyte, 1, 1, 0,
        doc: /* Convert the multibyte character CH to a byte.
 If the multibyte character does not represent a byte, return -1.  */)
-     (ch)
-     Lisp_Object ch;
+  (Lisp_Object ch)
 {
   int cm;
 
@@ -361,29 +305,17 @@ If the multibyte character does not represent a byte, return -1.  */)
     return ch;
   else
     {
-      int cu = CHAR_TO_BYTE8 (cm);
+      int cu = CHAR_TO_BYTE_SAFE (cm);
       return make_number (cu);
     }
 }
 
-DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
-       doc: /* Return 1 regardless of the argument CHAR.
-This is now an obsolete function.  We keep it just for backward compatibility.
-usage: (char-bytes CHAR)  */)
-     (ch)
-     Lisp_Object ch;
-{
-  CHECK_CHARACTER (ch);
-  return make_number (1);
-}
-
 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
        doc: /* Return width of CHAR when displayed in the current buffer.
 The width is measured by how many columns it occupies on the screen.
 Tab is taken to occupy `tab-width' columns.
 usage: (char-width CHAR)  */)
-     (ch)
-       Lisp_Object ch;
+  (Lisp_Object ch)
 {
   Lisp_Object disp;
   int c, width;
@@ -410,20 +342,19 @@ usage: (char-width CHAR)  */)
    characters and bytes of the substring in *NCHARS and *NBYTES
    respectively.  */
 
-int
-c_string_width (str, len, precision, nchars, nbytes)
-     const unsigned char *str;
-     int precision, *nchars, *nbytes;
+EMACS_INT
+c_string_width (const unsigned char *str, EMACS_INT len, int precision,
+		EMACS_INT *nchars, EMACS_INT *nbytes)
 {
-  int i = 0, i_byte = 0;
-  int width = 0;
+  EMACS_INT i = 0, i_byte = 0;
+  EMACS_INT width = 0;
   struct Lisp_Char_Table *dp = buffer_display_table ();
 
   while (i_byte < len)
     {
       int bytes, thiswidth;
       Lisp_Object val;
-      int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
+      int c = STRING_CHAR_AND_LENGTH (str + i_byte, bytes);
 
       if (dp)
 	{
@@ -463,12 +394,10 @@ c_string_width (str, len, precision, nchars, nbytes)
    current buffer.  The width is measured by how many columns it
    occupies on the screen.  */
 
-int
-strwidth (str, len)
-     unsigned char *str;
-     int len;
+EMACS_INT
+strwidth (const char *str, EMACS_INT len)
 {
-  return c_string_width (str, len, -1, NULL, NULL);
+  return c_string_width ((const unsigned char *) str, len, -1, NULL, NULL);
 }
 
 /* Return width of Lisp string STRING when displayed in the current
@@ -478,19 +407,18 @@ strwidth (str, len)
    PRECISION, and set number of characters and bytes of the substring
    in *NCHARS and *NBYTES respectively.  */
 
-int
-lisp_string_width (string, precision, nchars, nbytes)
-     Lisp_Object string;
-     int precision, *nchars, *nbytes;
+EMACS_INT
+lisp_string_width (Lisp_Object string, int precision,
+		   EMACS_INT *nchars, EMACS_INT *nbytes)
 {
-  int len = SCHARS (string);
+  EMACS_INT len = SCHARS (string);
   /* This set multibyte to 0 even if STRING is multibyte when it
      contains only ascii and eight-bit-graphic, but that's
      intentional.  */
   int multibyte = len < SBYTES (string);
   unsigned char *str = SDATA (string);
-  int i = 0, i_byte = 0;
-  int width = 0;
+  EMACS_INT i = 0, i_byte = 0;
+  EMACS_INT width = 0;
   struct Lisp_Char_Table *dp = buffer_display_table ();
 
   while (i < len)
@@ -513,7 +441,7 @@ lisp_string_width (string, precision, nchars, nbytes)
 	  int c;
 
 	  if (multibyte)
-	    c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
+	    c = STRING_CHAR_AND_LENGTH (str + i_byte, bytes);
 	  else
 	    c = str[i_byte], bytes = 1;
 	  chars = 1;
@@ -560,8 +488,7 @@ only the base leading-code is considered; the validity of
 the following bytes is not checked.  Tabs in STRING are always
 taken to occupy `tab-width' columns.
 usage: (string-width STRING)  */)
-     (str)
-     Lisp_Object str;
+  (Lisp_Object str)
 {
   Lisp_Object val;
 
@@ -574,8 +501,7 @@ DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
        doc: /* Return the direction of CHAR.
 The returned value is 0 for left-to-right and 1 for right-to-left.
 usage: (char-direction CHAR)  */)
-     (ch)
-     Lisp_Object ch;
+  (Lisp_Object ch)
 {
   int c;
 
@@ -591,9 +517,7 @@ usage: (char-direction CHAR)  */)
    nil, we treat each byte as a character.  */
 
 EMACS_INT
-chars_in_text (ptr, nbytes)
-     const unsigned char *ptr;
-     EMACS_INT nbytes;
+chars_in_text (const unsigned char *ptr, EMACS_INT nbytes)
 {
   /* current_buffer is null at early stages of Emacs initialization.  */
   if (current_buffer == 0
@@ -609,16 +533,14 @@ chars_in_text (ptr, nbytes)
    ignores enable-multibyte-characters.  */
 
 EMACS_INT
-multibyte_chars_in_text (ptr, nbytes)
-     const unsigned char *ptr;
-     EMACS_INT nbytes;
+multibyte_chars_in_text (const unsigned char *ptr, EMACS_INT nbytes)
 {
   const unsigned char *endp = ptr + nbytes;
-  int chars = 0;
+  EMACS_INT chars = 0;
 
   while (ptr < endp)
     {
-      int len = MULTIBYTE_LENGTH (ptr, endp);
+      EMACS_INT len = MULTIBYTE_LENGTH (ptr, endp);
 
       if (len == 0)
 	abort ();
@@ -636,19 +558,19 @@ multibyte_chars_in_text (ptr, nbytes)
    represented by 2-byte in a multibyte text.  */
 
 void
-parse_str_as_multibyte (str, len, nchars, nbytes)
-     const unsigned char *str;
-     int len, *nchars, *nbytes;
+parse_str_as_multibyte (const unsigned char *str, EMACS_INT len,
+			EMACS_INT *nchars, EMACS_INT *nbytes)
 {
   const unsigned char *endp = str + len;
-  int n, chars = 0, bytes = 0;
+  EMACS_INT n, chars = 0, bytes = 0;
 
   if (len >= MAX_MULTIBYTE_LENGTH)
     {
       const unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
       while (str < adjusted_endp)
 	{
-	  if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
+	  if (! CHAR_BYTE8_HEAD_P (*str)
+	      && (n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
 	    str += n, bytes += n;
 	  else
 	    str++, bytes += 2;
@@ -657,7 +579,8 @@ parse_str_as_multibyte (str, len, nchars, nbytes)
     }
   while (str < endp)
     {
-      if ((n = MULTIBYTE_LENGTH (str, endp)) > 0)
+      if (! CHAR_BYTE8_HEAD_P (*str)
+	  && (n = MULTIBYTE_LENGTH (str, endp)) > 0)
 	str += n, bytes += n;
       else
 	str++, bytes += 2;
@@ -677,24 +600,26 @@ parse_str_as_multibyte (str, len, nchars, nbytes)
    area and that is enough.  Return the number of bytes of the
    resulting text.  */
 
-int
-str_as_multibyte (str, len, nbytes, nchars)
-     unsigned char *str;
-     int len, nbytes, *nchars;
+EMACS_INT
+str_as_multibyte (unsigned char *str, EMACS_INT len, EMACS_INT nbytes,
+		  EMACS_INT *nchars)
 {
   unsigned char *p = str, *endp = str + nbytes;
   unsigned char *to;
-  int chars = 0;
+  EMACS_INT chars = 0;
   int n;
 
   if (nbytes >= MAX_MULTIBYTE_LENGTH)
     {
       unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
       while (p < adjusted_endp
+	     && ! CHAR_BYTE8_HEAD_P (*p)
 	     && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
 	p += n, chars++;
     }
-  while ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
+  while (p < endp
+	 && ! CHAR_BYTE8_HEAD_P (*p)
+	 && (n = MULTIBYTE_LENGTH (p, endp)) > 0)
     p += n, chars++;
   if (nchars)
     *nchars = chars;
@@ -704,7 +629,7 @@ str_as_multibyte (str, len, nbytes, nchars)
   to = p;
   nbytes = endp - p;
   endp = str + len;
-  safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes);
+  memmove (endp - nbytes, p, nbytes);
   p = endp - nbytes;
 
   if (nbytes >= MAX_MULTIBYTE_LENGTH)
@@ -712,7 +637,8 @@ str_as_multibyte (str, len, nbytes, nchars)
       unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
       while (p < adjusted_endp)
 	{
-	  if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
+	  if (! CHAR_BYTE8_HEAD_P (*p)
+	      && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0)
 	    {
 	      while (n--)
 		*to++ = *p++;
@@ -728,7 +654,8 @@ str_as_multibyte (str, len, nbytes, nchars)
     }
   while (p < endp)
     {
-      if ((n = MULTIBYTE_LENGTH (p, endp)) > 0)
+      if (! CHAR_BYTE8_HEAD_P (*p)
+	  && (n = MULTIBYTE_LENGTH (p, endp)) > 0)
 	{
 	  while (n--)
 	    *to++ = *p++;
@@ -750,13 +677,11 @@ str_as_multibyte (str, len, nbytes, nchars)
    bytes it may ocupy when converted to multibyte string by
    `str_to_multibyte'.  */
 
-int
-parse_str_to_multibyte (str, len)
-     unsigned char *str;
-     int len;
+EMACS_INT
+parse_str_to_multibyte (const unsigned char *str, EMACS_INT len)
 {
-  unsigned char *endp = str + len;
-  int bytes;
+  const unsigned char *endp = str + len;
+  EMACS_INT bytes;
 
   for (bytes = 0; str < endp; str++)
     bytes += (*str < 0x80) ? 1 : 2;
@@ -770,10 +695,8 @@ parse_str_to_multibyte (str, len)
    that we can use LEN bytes at STR as a work area and that is
    enough.  */
 
-int
-str_to_multibyte (str, len, bytes)
-     unsigned char *str;
-     int len, bytes;
+EMACS_INT
+str_to_multibyte (unsigned char *str, EMACS_INT len, EMACS_INT bytes)
 {
   unsigned char *p = str, *endp = str + bytes;
   unsigned char *to;
@@ -784,7 +707,7 @@ str_to_multibyte (str, len, bytes)
   to = p;
   bytes = endp - p;
   endp = str + len;
-  safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
+  memmove (endp - bytes, p, bytes);
   p = endp - bytes;
   while (p < endp)
     {
@@ -801,10 +724,8 @@ str_to_multibyte (str, len, bytes)
    actually converts characters in the range 0x80..0xFF to
    unibyte.  */
 
-int
-str_as_unibyte (str, bytes)
-     unsigned char *str;
-     int bytes;
+EMACS_INT
+str_as_unibyte (unsigned char *str, EMACS_INT bytes)
 {
   const unsigned char *p = str, *endp = str + bytes;
   unsigned char *to;
@@ -836,15 +757,43 @@ str_as_unibyte (str, bytes)
   return (to - str);
 }
 
-int
-string_count_byte8 (string)
-     Lisp_Object string;
+/* Convert eight-bit chars in SRC (in multibyte form) to the
+   corresponding byte and store in DST.  CHARS is the number of
+   characters in SRC.  The value is the number of bytes stored in DST.
+   Usually, the value is the same as CHARS, but is less than it if SRC
+   contains a non-ASCII, non-eight-bit characater.  If ACCEPT_LATIN_1
+   is nonzero, a Latin-1 character is accepted and converted to a byte
+   of that character code.
+   Note: Currently the arg ACCEPT_LATIN_1 is not used.  */
+
+EMACS_INT
+str_to_unibyte (const unsigned char *src, unsigned char *dst, EMACS_INT chars, int accept_latin_1)
+{
+  EMACS_INT i;
+
+  for (i = 0; i < chars; i++)
+    {
+      int c = STRING_CHAR_ADVANCE (src);
+
+      if (CHAR_BYTE8_P (c))
+	c = CHAR_TO_BYTE8 (c);
+      else if (! ASCII_CHAR_P (c)
+	       && (! accept_latin_1 || c >= 0x100))
+	return i;
+      *dst++ = c;
+    }
+  return i;
+}
+
+
+EMACS_INT
+string_count_byte8 (Lisp_Object string)
 {
   int multibyte = STRING_MULTIBYTE (string);
-  int nbytes = SBYTES (string);
+  EMACS_INT nbytes = SBYTES (string);
   unsigned char *p = SDATA (string);
   unsigned char *pend = p + nbytes;
-  int count = 0;
+  EMACS_INT count = 0;
   int c, len;
 
   if (multibyte)
@@ -868,13 +817,12 @@ string_count_byte8 (string)
 
 
 Lisp_Object
-string_escape_byte8 (string)
-     Lisp_Object string;
+string_escape_byte8 (Lisp_Object string)
 {
-  int nchars = SCHARS (string);
-  int nbytes = SBYTES (string);
+  EMACS_INT nchars = SCHARS (string);
+  EMACS_INT nbytes = SBYTES (string);
   int multibyte = STRING_MULTIBYTE (string);
-  int byte8_count;
+  EMACS_INT byte8_count;
   const unsigned char *src, *src_end;
   unsigned char *dst;
   Lisp_Object val;
@@ -889,12 +837,22 @@ string_escape_byte8 (string)
     return string;
 
   if (multibyte)
-    /* Convert 2-byte sequence of byte8 chars to 4-byte octal.  */
-    val = make_uninit_multibyte_string (nchars + byte8_count * 3,
-					nbytes + byte8_count * 2);
+    {
+      if ((MOST_POSITIVE_FIXNUM - nchars) / 3 < byte8_count
+	  || (MOST_POSITIVE_FIXNUM - nbytes) / 2 < byte8_count)
+	error ("Maximum string size exceeded");
+
+      /* Convert 2-byte sequence of byte8 chars to 4-byte octal.  */
+      val = make_uninit_multibyte_string (nchars + byte8_count * 3,
+					  nbytes + byte8_count * 2);
+    }
   else
-    /* Convert 1-byte sequence of byte8 chars to 4-byte octal.  */
-    val = make_uninit_string (nbytes + byte8_count * 3);
+    {
+      if ((MOST_POSITIVE_FIXNUM - nchars) / 3 < byte8_count)
+	error ("Maximum string size exceeded");
+      /* Convert 1-byte sequence of byte8 chars to 4-byte octal.  */
+      val = make_uninit_string (nbytes + byte8_count * 3);
+    }
 
   src = SDATA (string);
   src_end = src + nbytes;
@@ -935,14 +893,15 @@ DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
        doc: /*
 Concatenate all the argument characters and make the result a string.
 usage: (string &rest CHARACTERS)  */)
-     (n, args)
-     int n;
-     Lisp_Object *args;
+  (int n, Lisp_Object *args)
 {
-  int i;
-  unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
-  unsigned char *p = buf;
-  int c;
+  int i, c;
+  unsigned char *buf, *p;
+  Lisp_Object str;
+  USE_SAFE_ALLOCA;
+
+  SAFE_ALLOCA (buf, unsigned char *, MAX_MULTIBYTE_LENGTH * n);
+  p = buf;
 
   for (i = 0; i < n; i++)
     {
@@ -951,20 +910,23 @@ usage: (string &rest CHARACTERS)  */)
       p += CHAR_STRING (c, p);
     }
 
-  return make_string_from_bytes ((char *) buf, n, p - buf);
+  str = make_string_from_bytes ((char *) buf, n, p - buf);
+  SAFE_FREE ();
+  return str;
 }
 
 DEFUN ("unibyte-string", Funibyte_string, Sunibyte_string, 0, MANY, 0,
        doc: /* Concatenate all the argument bytes and make the result a unibyte string.
 usage: (unibyte-string &rest BYTES)  */)
-     (n, args)
-     int n;
-     Lisp_Object *args;
+  (int n, Lisp_Object *args)
 {
-  int i;
-  unsigned char *buf = (unsigned char *) alloca (n);
-  unsigned char *p = buf;
-  unsigned c;
+  int i, c;
+  unsigned char *buf, *p;
+  Lisp_Object str;
+  USE_SAFE_ALLOCA;
+
+  SAFE_ALLOCA (buf, unsigned char *, n);
+  p = buf;
 
   for (i = 0; i < n; i++)
     {
@@ -975,17 +937,18 @@ usage: (unibyte-string &rest BYTES)  */)
       *p++ = c;
     }
 
-  return make_string_from_bytes ((char *) buf, n, p - buf);
+  str = make_string_from_bytes ((char *) buf, n, p - buf);
+  SAFE_FREE ();
+  return str;
 }
 
-DEFUN ("char-resolve-modifers", Fchar_resolve_modifiers,
+DEFUN ("char-resolve-modifiers", Fchar_resolve_modifiers,
        Schar_resolve_modifiers, 1, 1, 0,
        doc: /* Resolve modifiers in the character CHAR.
 The value is a character with modifiers resolved into the character
 code.  Unresolved modifiers are kept in the value.
-usage: (char-resolve-modifers CHAR)  */)
-     (character)
-     Lisp_Object character;
+usage: (char-resolve-modifiers CHAR)  */)
+  (Lisp_Object character)
 {
   int c;
 
@@ -994,15 +957,75 @@ usage: (char-resolve-modifers CHAR)  */)
   return make_number (char_resolve_modifier_mask (c));
 }
 
+DEFUN ("get-byte", Fget_byte, Sget_byte, 0, 2, 0,
+       doc: /* Return a byte value of a character at point.
+Optional 1st arg POSITION, if non-nil, is a position of a character to get
+a byte value.
+Optional 2nd arg STRING, if non-nil, is a string of which first
+character is a target to get a byte value.  In this case, POSITION, if
+non-nil, is an index of a target character in the string.
+
+If the current buffer (or STRING) is multibyte, and the target
+character is not ASCII nor 8-bit character, an error is signalled.  */)
+  (Lisp_Object position, Lisp_Object string)
+{
+  int c;
+  EMACS_INT pos;
+  unsigned char *p;
+
+  if (NILP (string))
+    {
+      if (NILP (position))
+	{
+	  p = PT_ADDR;
+	}
+      else
+	{
+	  CHECK_NUMBER_COERCE_MARKER (position);
+	  if (XINT (position) < BEGV || XINT (position) >= ZV)
+	    args_out_of_range_3 (position, make_number (BEGV), make_number (ZV));
+	  pos = XFASTINT (position);
+	  p = CHAR_POS_ADDR (pos);
+	}
+      if (NILP (current_buffer->enable_multibyte_characters))
+	return make_number (*p);
+    }
+  else
+    {
+      CHECK_STRING (string);
+      if (NILP (position))
+	{
+	  p = SDATA (string);
+	}
+      else
+	{
+	  CHECK_NATNUM (position);
+	  if (XINT (position) >= SCHARS (string))
+	    args_out_of_range (string, position);
+	  pos = XFASTINT (position);
+	  p = SDATA (string) + string_char_to_byte (string, pos);
+	}
+      if (! STRING_MULTIBYTE (string))
+	return make_number (*p);
+    }
+  c = STRING_CHAR (p);
+  if (CHAR_BYTE8_P (c))
+    c = CHAR_TO_BYTE8 (c);
+  else if (! ASCII_CHAR_P (c))
+    error ("Not an ASCII nor an 8-bit character: %d", c);
+  return make_number (c);
+}
+
+
 void
-init_character_once ()
+init_character_once (void)
 {
 }
 
 #ifdef emacs
 
 void
-syms_of_character ()
+syms_of_character (void)
 {
   DEFSYM (Qcharacterp, "characterp");
   DEFSYM (Qauto_fill_chars, "auto-fill-chars");
@@ -1014,22 +1037,22 @@ syms_of_character ()
   defsubr (&Scharacterp);
   defsubr (&Sunibyte_char_to_multibyte);
   defsubr (&Smultibyte_char_to_unibyte);
-  defsubr (&Schar_bytes);
   defsubr (&Schar_width);
   defsubr (&Sstring_width);
   defsubr (&Schar_direction);
   defsubr (&Sstring);
   defsubr (&Sunibyte_string);
   defsubr (&Schar_resolve_modifiers);
+  defsubr (&Sget_byte);
 
-  DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
+  DEFVAR_LISP ("translation-table-vector",  Vtranslation_table_vector,
 	       doc: /*
 Vector recording all translation tables ever defined.
 Each element is a pair (SYMBOL . TABLE) relating the table to the
 symbol naming it.  The ID of a translation table is an index into this vector.  */);
   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
 
-  DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
+  DEFVAR_LISP ("auto-fill-chars", Vauto_fill_chars,
 	       doc: /*
 A char-table for characters which invoke auto-filling.
 Such characters have value t in this table.  */);
@@ -1037,7 +1060,7 @@ Such characters have value t in this table.  */);
   CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt);
   CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt);
 
-  DEFVAR_LISP ("char-width-table", &Vchar_width_table,
+  DEFVAR_LISP ("char-width-table", Vchar_width_table,
 	       doc: /*
 A char-table for width (columns) of each character.  */);
   Vchar_width_table = Fmake_char_table (Qnil, make_number (1));
@@ -1045,11 +1068,11 @@ A char-table for width (columns) of each character.  */);
   char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR,
 			make_number (4));
 
-  DEFVAR_LISP ("char-direction-table", &Vchar_direction_table,
+  DEFVAR_LISP ("char-direction-table", Vchar_direction_table,
 	       doc: /* A char-table for direction of each character.  */);
   Vchar_direction_table = Fmake_char_table (Qnil, make_number (1));
 
-  DEFVAR_LISP ("printable-chars", &Vprintable_chars,
+  DEFVAR_LISP ("printable-chars", Vprintable_chars,
 	       doc: /* A char-table for each printable character.  */);
   Vprintable_chars = Fmake_char_table (Qnil, Qnil);
   Fset_char_table_range (Vprintable_chars,
@@ -1058,24 +1081,36 @@ A char-table for width (columns) of each character.  */);
 			 Fcons (make_number (160),
 				make_number (MAX_5_BYTE_CHAR)), Qt);
 
-  DEFVAR_LISP ("char-script-table", &Vchar_script_table,
+  DEFVAR_LISP ("char-script-table", Vchar_script_table,
 	       doc: /* Char table of script symbols.
 It has one extra slot whose value is a list of script symbols.  */);
 
   /* Intern this now in case it isn't already done.
      Setting this variable twice is harmless.
      But don't staticpro it here--that is done in alloc.c.  */
-  Qchar_table_extra_slots = intern ("char-table-extra-slots");
+  Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
   DEFSYM (Qchar_script_table, "char-script-table");
   Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1));
   Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
 
-  DEFVAR_LISP ("script-representative-chars", &Vscript_representative_chars,
-	       doc: /* Alist of scripts vs the representative characters.  */);
+  DEFVAR_LISP ("script-representative-chars", Vscript_representative_chars,
+	       doc: /* Alist of scripts vs the representative characters.
+Each element is a cons (SCRIPT . CHARS).
+SCRIPT is a symbol representing a script or a subgroup of a script.
+CHARS is a list or a vector of characters.
+If it is a list, all characters in the list are necessary for supporting SCRIPT.
+If it is a vector, one of the characters in the vector is necessary.
+This variable is used to find a font for a specific script.  */);
   Vscript_representative_chars = Qnil;
+
+  DEFVAR_LISP ("unicode-category-table", Vunicode_category_table,
+	       doc: /* Char table of Unicode's "General Category".
+All Unicode characters have one of the following values (symbol):
+  Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po,
+  Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn
+See The Unicode Standard for the meaning of those values.  */);
+  /* The correct char-table is setup in characters.el.  */
+  Vunicode_category_table = Qnil;
 }
 
 #endif /* emacs */
-
-/* arch-tag: b6665960-3c3d-4184-85cd-af4318197999
-   (do not change this comment) */