/* Basic character support.
Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
- Licensed to the Free Software Foundation.
- Copyright (C) 2001 Free Software Foundation, Inc.
- Copyright (C) 2001, 2002
+ Licensed to the Free Software Foundation.
+ Copyright (C) 2001, 2005 Free Software Foundation, Inc.
+ Copyright (C) 2003
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H13PRO009
Lisp_Object Qauto_fill_chars;
+/* Char-table of information about which character to unify to which
+ Unicode character. */
Lisp_Object Vchar_unify_table;
/* A char-table. An element is non-nil iff the corresponding
/* Mapping table from unibyte chars to multibyte chars. */
int unibyte_to_multibyte_table[256];
+/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
+ char. */
+char unibyte_has_multibyte_table[256];
+
\f
+/* Store multibyte form of character C at P. If C has modifier bits,
+ handle them appropriately. */
+
int
-char_string_with_unification (c, p)
+char_string (c, p)
int c;
unsigned char *p;
{
int bytes;
+ if (c & CHAR_MODIFIER_MASK)
+ {
+ /* As an non-ASCII character can't have modifier bits, we just
+ ignore the bits. */
+ if (ASCII_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
+ {
+ /* For Meta, Shift, and Control modifiers, we need special care. */
+ if (c & CHAR_META)
+ {
+ /* Move the meta bit to the right place for a string. */
+ c = (c & ~CHAR_META) | 0x80;
+ }
+ if (c & CHAR_SHIFT)
+ {
+ /* Shift modifier is valid only with [A-Za-z]. */
+ if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
+ c &= ~CHAR_SHIFT;
+ else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
+ c = (c & ~CHAR_SHIFT) - ('a' - 'A');
+ }
+ if (c & CHAR_CTL)
+ {
+ /* Simulate the code in lread.c. */
+ /* Allow `\C- ' and `\C-?'. */
+ if (c == (CHAR_CTL | ' '))
+ c = 0;
+ else if (c == (CHAR_CTL | '?'))
+ c = 127;
+ /* ASCII control chars are made from letters (both cases),
+ as well as the non-letters within 0100...0137. */
+ else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
+ c &= (037 | (~0177 & ~CHAR_CTL));
+ else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
+ c &= (037 | (~0177 & ~CHAR_CTL));
+ }
+ }
+
+ /* If C still has any modifier bits, just ignore it. */
+ c &= ~CHAR_MODIFIER_MASK;
+ }
+
MAYBE_UNIFY_CHAR (c);
- if (c <= MAX_3_BYTE_CHAR || c > MAX_5_BYTE_CHAR)
+ if (c <= MAX_3_BYTE_CHAR)
{
bytes = CHAR_STRING (c, p);
}
p[3] = (0x80 | (c & 0x3F));
bytes = 4;
}
- else
+ else if (c <= MAX_5_BYTE_CHAR)
{
p[0] = 0xF8;
p[1] = (0x80 | ((c >> 18) & 0x0F));
p[4] = (0x80 | (c & 0x3F));
bytes = 5;
}
+ else
+ {
+ c = CHAR_TO_BYTE8 (c);
+ bytes = BYTE8_STRING (c, p);
+ }
return bytes;
}
+/* Return a character whose multibyte form is at P. Set LEN is not
+ NULL, it must be a pointer to integer. In that case, set *LEN to
+ the byte length of the multibyte form. If ADVANCED is not NULL, is
+ must be a pointer to unsigned char. In that case, set *ADVANCED to
+ the ending address (i.e. the starting address of the next
+ character) of the multibyte form. */
+
int
-string_char_with_unification (p, advanced, len)
+string_char (p, advanced, len)
const unsigned char *p;
const unsigned char **advanced;
int *len;
/* Translate character C by translation table TABLE. If C is
negative, translate a character specified by CHARSET and CODE. If
no translation is found in TABLE, return the untranslated
- character. */
+ character. If TABLE is a list, elements are char tables. In this
+ case, translace C by all tables. */
int
translate_char (table, c)
Lisp_Object table;
int c;
{
- Lisp_Object ch;
-
- if (! CHAR_TABLE_P (table))
- return c;
- ch = CHAR_TABLE_REF (table, c);
- if (! CHARACTERP (ch))
- return c;
- return XINT (ch);
+ if (CHAR_TABLE_P (table))
+ {
+ Lisp_Object ch;
+
+ ch = CHAR_TABLE_REF (table, c);
+ if (CHARACTERP (ch))
+ c = XINT (ch);
+ }
+ else
+ {
+ for (; CONSP (table); table = XCDR (table))
+ c = translate_char (XCAR (table), c);
+ }
+ return c;
}
/* Convert the multibyte character C to unibyte 8-bit character based
int
c_string_width (str, len, precision, nchars, nbytes)
- unsigned char *str;
+ const unsigned char *str;
int precision, *nchars, *nbytes;
{
int i = 0, i_byte = 0;
Lisp_Object string;
int precision, *nchars, *nbytes;
{
- int len = XSTRING (string)->size;
- unsigned char *str = XSTRING (string)->data;
+ int len = SCHARS (string);
+ unsigned char *str = SDATA (string);
int i = 0, i_byte = 0;
int width = 0;
struct Lisp_Char_Table *dp = buffer_display_table ();
int chars, bytes, thiswidth;
Lisp_Object val;
int cmp_id;
- int ignore, end;
+ EMACS_INT ignore, end;
if (find_composition (i, -1, &ignore, &end, &val, string)
&& ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
return CHAR_TABLE_REF (Vchar_direction_table, c);
}
-DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
- doc: /* Return number of characters between BEG and END.
-This is now an obsolete function. We keep it just for backward compatibility. */)
- (beg, end)
- Lisp_Object beg, end;
-{
- int from, to;
-
- CHECK_NUMBER_COERCE_MARKER (beg);
- CHECK_NUMBER_COERCE_MARKER (end);
-
- from = min (XFASTINT (beg), XFASTINT (end));
- to = max (XFASTINT (beg), XFASTINT (end));
-
- return make_number (to - from);
-}
-
/* Return the number of characters in the NBYTES bytes at PTR.
This works by looking at the contents and checking for multibyte
sequences while assuming that there's no invalid sequence.
int
chars_in_text (ptr, nbytes)
- unsigned char *ptr;
+ const unsigned char *ptr;
int nbytes;
{
/* current_buffer is null at early stages of Emacs initialization. */
int
multibyte_chars_in_text (ptr, nbytes)
- unsigned char *ptr;
+ const unsigned char *ptr;
int nbytes;
{
- unsigned char *endp = ptr + nbytes;
+ const unsigned char *endp = ptr + nbytes;
int chars = 0;
while (ptr < endp)
void
parse_str_as_multibyte (str, len, nchars, nbytes)
- unsigned char *str;
+ const unsigned char *str;
int len, *nchars, *nbytes;
{
- unsigned char *endp = str + len;
+ const unsigned char *endp = str + len;
int n, chars = 0, bytes = 0;
if (len >= MAX_MULTIBYTE_LENGTH)
{
- unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
+ const unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH;
while (str < adjusted_endp)
{
if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0)
{
while (n--)
*to++ = *p++;
- }
+ }
else
{
int c = *p++;
endp = str + len;
safe_bcopy ((char *) p, (char *) (endp - bytes), bytes);
p = endp - bytes;
- while (p < endp)
+ while (p < endp)
{
int c = *p++;
p += len;
}
to = str + (p - str);
- while (p < endp)
+ while (p < endp)
{
c = *p;
len = BYTES_BY_CHAR_HEAD (c);
Lisp_Object string;
{
int multibyte = STRING_MULTIBYTE (string);
- int nbytes = STRING_BYTES (XSTRING (string));
- unsigned char *p = XSTRING (string)->data;
+ int nbytes = SBYTES (string);
+ unsigned char *p = SDATA (string);
unsigned char *pend = p + nbytes;
int count = 0;
int c, len;
string_escape_byte8 (string)
Lisp_Object string;
{
- int nchars = XSTRING (string)->size;
- int nbytes = STRING_BYTES (XSTRING (string));
+ int nchars = SCHARS (string);
+ int nbytes = SBYTES (string);
int multibyte = STRING_MULTIBYTE (string);
int byte8_count;
const unsigned char *src, *src_end;
/* Convert 1-byte sequence of byte8 chars to 4-byte octal. */
val = make_uninit_string (nbytes + byte8_count * 3);
- src = XSTRING (string)->data;
+ src = SDATA (string);
src_end = src + nbytes;
- dst = XSTRING (val)->data;
+ dst = SDATA (val);
if (multibyte)
while (src < src_end)
{
}
\f
-DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
+DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
doc: /*
Concatenate all the argument characters and make the result a string.
usage: (string &rest CHARACTERS) */)
defsubr (&Schar_width);
defsubr (&Sstring_width);
defsubr (&Schar_direction);
- defsubr (&Schars_in_region);
defsubr (&Sstring);
DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
}
#endif /* emacs */
+
+/* arch-tag: b6665960-3c3d-4184-85cd-af4318197999
+ (do not change this comment) */