/* Basic multilingual character support.
- Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
- Copyright (C) 2001 Free Software Foundation, Inc.
+ Copyright (C) 2001, 2002, 2003, 2004, 2005,
+ 2006 Free Software Foundation, Inc.
+ Copyright (C) 1995, 1997, 1998, 1999, 2000, 2001
+ National Institute of Advanced Industrial Science and Technology (AIST)
+ Registration Number H14PRO021
This file is part of GNU Emacs.
You should have received a copy of the GNU General Public License
along with GNU Emacs; see the file COPYING. If not, write to
-the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
+the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
/* At first, see the document in `charset.h' to understand the code in
this file. */
Lisp_Object Qunknown;
/* Declaration of special leading-codes. */
-int leading_code_private_11; /* for private DIMENSION1 of 1-column */
-int leading_code_private_12; /* for private DIMENSION1 of 2-column */
-int leading_code_private_21; /* for private DIMENSION2 of 1-column */
-int leading_code_private_22; /* for private DIMENSION2 of 2-column */
+EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
+EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
+EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
+EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
/* Declaration of special charsets. The values are set by
Fsetup_special_charsets. */
int charset_latin_jisx0201; /* JISX0201.Roman (Japanese Roman) */
int charset_big5_1; /* Big5 Level 1 (Chinese Traditional) */
int charset_big5_2; /* Big5 Level 2 (Chinese Traditional) */
+int charset_mule_unicode_0100_24ff;
+int charset_mule_unicode_2500_33ff;
+int charset_mule_unicode_e000_ffff;
Lisp_Object Qcharset_table;
int _fetch_multibyte_char_len;
/* Offset to add to a non-ASCII value when inserting it. */
-int nonascii_insert_offset;
+EMACS_INT nonascii_insert_offset;
/* Translation table for converting non-ASCII unibyte characters
to multibyte codes, or nil. */
invalid_character (c)
int c;
{
- error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
+ error ("Invalid character: %d, #o%o, #x%x", c, c, c);
}
/* Parse string STR of length LENGTH and fetch information of a
(charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F; \
} while (0)
-/* 1 if CHARSET, C1, and C2 compose a valid character, else 0. */
+/* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
+ Note that this intentionally allows invalid components, such
+ as 0xA0 0xA0, because there exist many files that contain
+ such invalid byte sequences, especially in EUC-GB. */
#define CHAR_COMPONENTS_VALID_P(charset, c1, c2) \
((charset) == CHARSET_ASCII \
? ((c1) >= 0 && (c1) <= 0x7F) \
/* If C still has any modifier bits, just ignore it. */
c &= ~CHAR_MODIFIER_MASK;
}
-
+
if (SINGLE_BYTE_CHAR_P (c))
{
if (ASCII_BYTE_P (c) || c >= 0xA0)
? LEADING_CODE_PRIVATE_21
: LEADING_CODE_PRIVATE_22)));
*p++ = charset;
- if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32)
+ if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
return -1;
if (c1)
{
return 0;
else if (c >= MAX_CHAR)
return 0;
-
+
SPLIT_CHAR (c, charset, c1, c2);
if (! CHARSET_DEFINED_P (charset))
return 0;
SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
dimension = CHARSET_DIMENSION (alt_charset);
- if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
+ if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
/* CH is not a generic character, just return it. */
return XFASTINT (ch);
Lisp_Object val;
int charset;
- return ((SYMBOLP (charset_symbol)
- && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
- && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
- CHARSET_VALID_P (charset)))
- ? charset : -1);
+ /* This originally used a ?: operator, but reportedly the HP-UX
+ compiler version HP92453-01 A.10.32.22 miscompiles that. */
+ if (SYMBOLP (charset_symbol)
+ && VECTORP (val = Fget (charset_symbol, Qcharset))
+ && CHARSET_VALID_P (charset =
+ XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
+ return charset;
+ else
+ return -1;
}
/* Return an identification number for a new private charset of
Lisp_Object *vec;
if (!NILP (charset_id))
- CHECK_NUMBER (charset_id, 0);
- CHECK_SYMBOL (charset_symbol, 1);
- CHECK_VECTOR (info_vector, 2);
+ CHECK_NUMBER (charset_id);
+ CHECK_SYMBOL (charset_symbol);
+ CHECK_VECTOR (info_vector);
if (! NILP (charset_id))
{
|| !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
|| !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
|| !INTEGERP (vec[4])
- || !(XINT (vec[4]) == -1 || XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
+ || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
|| !INTEGERP (vec[5])
|| !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
|| !STRINGP (vec[6])
|| !STRINGP (vec[7])
|| !STRINGP (vec[8]))
error ("Invalid info-vector argument for defining charset %s",
- XSYMBOL (charset_symbol)->name->data);
+ SDATA (SYMBOL_NAME (charset_symbol)));
if (NILP (charset_id))
{
charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
if (XINT (charset_id) == 0)
error ("There's no room for a new private charset %s",
- XSYMBOL (charset_symbol)->name->data);
+ SDATA (SYMBOL_NAME (charset_symbol)));
}
update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
Vcharset_list = Fcons (charset_symbol, Vcharset_list);
+ Fupdate_coding_systems_internal ();
return Qnil;
}
DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
Sget_unused_iso_final_char, 2, 2, 0,
- doc: /* Return an unsed ISO's final char for a charset of DIMENISION and CHARS.
+ doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
DIMENSION is the number of bytes to represent a character: 1 or 2.
CHARS is the number of characters in a dimension: 94 or 96.
{
int final_char;
- CHECK_NUMBER (dimension, 0);
- CHECK_NUMBER (chars, 1);
+ CHECK_NUMBER (dimension);
+ CHECK_NUMBER (chars);
if (XINT (dimension) != 1 && XINT (dimension) != 2)
error ("Invalid charset dimension %d, it should be 1 or 2",
XINT (dimension));
DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
4, 4, 0,
- doc: /* Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.
-CHARSET should be defined by `defined-charset' in advance. */)
- (dimension, chars, final_char, charset_symbol)
- Lisp_Object dimension, chars, final_char, charset_symbol;
+ doc: /* Declare an equivalent charset for ISO-2022 decoding.
+
+On decoding by an ISO-2022 base coding system, when a charset
+specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
+if CHARSET is designated instead. */)
+ (dimension, chars, final_char, charset)
+ Lisp_Object dimension, chars, final_char, charset;
{
- int charset;
+ int charset_id;
- CHECK_NUMBER (dimension, 0);
- CHECK_NUMBER (chars, 1);
- CHECK_NUMBER (final_char, 2);
- CHECK_SYMBOL (charset_symbol, 3);
+ CHECK_NUMBER (dimension);
+ CHECK_NUMBER (chars);
+ CHECK_NUMBER (final_char);
+ CHECK_SYMBOL (charset);
if (XINT (dimension) != 1 && XINT (dimension) != 2)
error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
- if ((charset = get_charset_id (charset_symbol)) < 0)
- error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
+ if ((charset_id = get_charset_id (charset)) < 0)
+ error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
- ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
+ ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
return Qnil;
}
int
find_charset_in_text (ptr, nchars, nbytes, charsets, table)
- unsigned char *ptr;
+ const unsigned char *ptr;
int nchars, nbytes, *charsets;
Lisp_Object table;
{
{
if (charsets && nbytes > 0)
{
- unsigned char *endp = ptr + nbytes;
+ const unsigned char *endp = ptr + nbytes;
int maskbits = 0;
while (ptr < endp && maskbits != 7)
{
maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
ptr++;
- }
+ }
if (maskbits & 1)
charsets[CHARSET_ASCII] = 1;
int i;
Lisp_Object val;
- CHECK_STRING (str, 0);
+ CHECK_STRING (str);
bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
- find_charset_in_text (XSTRING (str)->data, XSTRING (str)->size,
- STRING_BYTES (XSTRING (str)), charsets, table);
+ find_charset_in_text (SDATA (str), SCHARS (str),
+ SBYTES (str), charsets, table);
val = Qnil;
if (charsets[1])
{
int charset_id, c1, c2;
- CHECK_NUMBER (charset, 0);
+ CHECK_NUMBER (charset);
charset_id = XINT (charset);
if (!CHARSET_DEFINED_P (charset_id))
error ("Invalid charset ID: %d", XINT (charset));
c1 = 0;
else
{
- CHECK_NUMBER (code1, 1);
+ CHECK_NUMBER (code1);
c1 = XINT (code1);
}
if (NILP (code2))
c2 = 0;
else
{
- CHECK_NUMBER (code2, 2);
+ CHECK_NUMBER (code2);
c2 = XINT (code2);
}
}
DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
- doc: /* Return list of charset and one or two position-codes of CHAR.
-If CHAR is invalid as a character code,
-return a list of symbol `unknown' and CHAR. */)
+ doc: /* Return list of charset and one or two position-codes of CH.
+If CH is invalid as a character code,
+return a list of symbol `unknown' and CH. */)
(ch)
Lisp_Object ch;
{
int c, charset, c1, c2;
- CHECK_NUMBER (ch, 0);
+ CHECK_NUMBER (ch);
c = XFASTINT (ch);
if (!CHAR_VALID_P (c, 1))
return Fcons (Qunknown, Fcons (ch, Qnil));
}
DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
- doc: /* Return charset of CHAR. */)
+ doc: /* Return charset of CH. */)
(ch)
Lisp_Object ch;
{
- CHECK_NUMBER (ch, 0);
+ CHECK_NUMBER (ch);
return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
}
{
int charset;
- CHECK_NUMBER (dimension, 0);
- CHECK_NUMBER (chars, 1);
- CHECK_NUMBER (final_char, 2);
+ CHECK_NUMBER (dimension);
+ CHECK_NUMBER (chars);
+ CHECK_NUMBER (final_char);
if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
return Qnil;
{
int c;
- CHECK_NUMBER (ch, 0);
+ CHECK_NUMBER (ch);
c = XINT (ch);
if (c < 0 || c >= 0400)
error ("Invalid unibyte character: %d", c);
{
int c;
- CHECK_NUMBER (ch, 0);
+ CHECK_NUMBER (ch);
c = XINT (ch);
if (! CHAR_VALID_P (c, 0))
error ("Invalid multibyte character: %d", c);
}
DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
- doc: /* Return 1 regardless of the argument CHAR.
-This is now an obsolete function. We keep it just for backward compatibility. */)
+ doc: /* Return 1 regardless of the argument CH. */)
(ch)
Lisp_Object ch;
{
- CHECK_NUMBER (ch, 0);
+ CHECK_NUMBER (ch);
return make_number (1);
}
: 4))))
DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
- doc: /* Return width of CHAR when displayed in the current buffer.
+ doc: /* Return width of CH when displayed in the current buffer.
The width is measured by how many columns it occupies on the screen.
Tab is taken to occupy `tab-width' columns. */)
(ch)
int c;
struct Lisp_Char_Table *dp = buffer_display_table ();
- CHECK_NUMBER (ch, 0);
+ CHECK_NUMBER (ch);
c = XINT (ch);
int
c_string_width (str, len, precision, nchars, nbytes)
- unsigned char *str;
- int precision, *nchars, *nbytes;
+ const unsigned char *str;
+ int len, precision, *nchars, *nbytes;
{
int i = 0, i_byte = 0;
int width = 0;
Lisp_Object string;
int precision, *nchars, *nbytes;
{
- int len = XSTRING (string)->size;
- int len_byte = STRING_BYTES (XSTRING (string));
- unsigned char *str = XSTRING (string)->data;
+ int len = SCHARS (string);
+ int len_byte = SBYTES (string);
+ /* This set multibyte to 0 even if STRING is multibyte when it
+ contains only ascii and eight-bit-graphic, but that's
+ intentional. */
+ int multibyte = len < len_byte;
+ const unsigned char *str = SDATA (string);
int i = 0, i_byte = 0;
int width = 0;
struct Lisp_Char_Table *dp = buffer_display_table ();
}
else if (dp)
{
- int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
+ int c;
+ if (multibyte)
+ c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
+ else
+ c = str[i_byte], bytes = 1;
chars = 1;
val = DISP_CHAR_VECTOR (dp, c);
if (VECTORP (val))
else
{
chars = 1;
- PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
+ if (multibyte)
+ PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
+ else
+ bytes = 1;
thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
}
only the base leading-code is considered; the validity of
the following bytes is not checked. Tabs in STRING are always
taken to occupy `tab-width' columns. */)
- (str)
- Lisp_Object str;
+ (string)
+ Lisp_Object string;
{
Lisp_Object val;
- CHECK_STRING (str, 0);
- XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
+ CHECK_STRING (string);
+ XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
return val;
}
DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
- doc: /* Return the direction of CHAR.
+ doc: /* Return the direction of CH.
The returned value is 0 for left-to-right and 1 for right-to-left. */)
(ch)
Lisp_Object ch;
{
int charset;
- CHECK_NUMBER (ch, 0);
+ CHECK_NUMBER (ch);
charset = CHAR_CHARSET (XFASTINT (ch));
if (!CHARSET_DEFINED_P (charset))
invalid_character (XINT (ch));
return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
}
-DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
- doc: /* Return number of characters between BEG and END. */)
- (beg, end)
- Lisp_Object beg, end;
-{
- int from, to;
-
- CHECK_NUMBER_COERCE_MARKER (beg, 0);
- CHECK_NUMBER_COERCE_MARKER (end, 1);
-
- from = min (XFASTINT (beg), XFASTINT (end));
- to = max (XFASTINT (beg), XFASTINT (end));
-
- return make_number (to - from);
-}
-
/* Return the number of characters in the NBYTES bytes at PTR.
This works by looking at the contents and checking for multibyte sequences.
However, if the current buffer has enable-multibyte-characters = nil,
int
chars_in_text (ptr, nbytes)
- unsigned char *ptr;
+ const unsigned char *ptr;
int nbytes;
{
/* current_buffer is null at early stages of Emacs initialization. */
int
multibyte_chars_in_text (ptr, nbytes)
- unsigned char *ptr;
+ const unsigned char *ptr;
int nbytes;
{
- unsigned char *endp;
+ const unsigned char *endp;
int chars, bytes;
endp = ptr + nbytes;
0x80..0x9F are represented by 2 bytes in multibyte text. */
void
parse_str_as_multibyte (str, len, nchars, nbytes)
- unsigned char *str;
+ const unsigned char *str;
int len, *nchars, *nbytes;
{
- unsigned char *endp = str + len;
+ const unsigned char *endp = str + len;
int n, chars = 0, bytes = 0;
while (str < endp)
{
while (n--)
*to++ = *p++;
- }
+ }
else
{
*to++ = LEADING_CODE_8_BIT_CONTROL;
endp = str + len;
safe_bcopy (p, endp - bytes, bytes);
p = endp - bytes;
- while (p < endp)
+ while (p < endp)
{
if (*p < 0x80 || *p >= 0xA0)
*to++ = *p++;
while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
to = p;
- while (p < endp)
+ while (p < endp)
{
if (*p == LEADING_CODE_8_BIT_CONTROL)
*to++ = *(p + 1) - 0x20, p += 2;
}
\f
-DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
+DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
doc: /* Concatenate all the argument characters and make the result a string.
usage: (string &rest CHARACTERS) */)
(n, args)
int n;
Lisp_Object *args;
{
- int i;
- unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
- unsigned char *p = buf;
+ int i, bufsize;
+ unsigned char *buf, *p;
int c;
int multibyte = 0;
+ Lisp_Object ret;
+ USE_SAFE_ALLOCA;
+
+ bufsize = MAX_MULTIBYTE_LENGTH * n;
+ SAFE_ALLOCA (buf, unsigned char *, bufsize);
+ p = buf;
for (i = 0; i < n; i++)
{
- CHECK_NUMBER (args[i], 0);
+ CHECK_NUMBER (args[i]);
if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
multibyte = 1;
}
*p++ = c;
}
- return make_string_from_bytes (buf, n, p - buf);
+ ret = make_string_from_bytes (buf, n, p - buf);
+ SAFE_FREE ();
+
+ return ret;
}
#endif /* emacs */
charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
charset_big5_1 = charset_id_internal ("chinese-big5-1");
charset_big5_2 = charset_id_internal ("chinese-big5-2");
+ charset_mule_unicode_0100_24ff
+ = charset_id_internal ("mule-unicode-0100-24ff");
+ charset_mule_unicode_2500_33ff
+ = charset_id_internal ("mule-unicode-2500-33ff");
+ charset_mule_unicode_e000_ffff
+ = charset_id_internal ("mule-unicode-e000-ffff");
return Qnil;
}
defsubr (&Schar_width);
defsubr (&Sstring_width);
defsubr (&Schar_direction);
- defsubr (&Schars_in_region);
defsubr (&Sstring);
defsubr (&Ssetup_special_charsets);
}
#endif /* emacs */
+
+/* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
+ (do not change this comment) */