/* Multilingual characters handler.
Ver.1.0
-
Copyright (C) 1995 Free Software Foundation, Inc.
Copyright (C) 1995 Electrotechnical Laboratory, JAPAN.
- This program is free software; you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 2, or (at your option)
- any later version.
+This file is part of GNU Emacs.
+
+GNU Emacs is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 2, or (at your option)
+any later version.
- This program is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
+GNU Emacs is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+GNU General Public License for more details.
- You should have received a copy of the GNU General Public License
- along with this program; if not, write to the Free Software
- Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */
+You should have received a copy of the GNU General Public License
+along with GNU Emacs; see the file COPYING. If not, write to
+the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+Boston, MA 02111-1307, USA. */
/* At first, see the document in `charset.h' to understand the code in
this file. */
#include "buffer.h"
#include "charset.h"
#include "coding.h"
+#include "disptab.h"
#else /* not emacs */
CHARS, and FINAL-CHAR) to Emacs' charset. */
int iso_charset_table[2][2][128];
+/* Table of pointers to the structure `cmpchar_info' indexed by
+ CMPCHAR-ID. */
+struct cmpchar_info **cmpchar_table;
+/* The current size of `cmpchar_table'. */
+static int cmpchar_table_size;
+/* Number of the current composite characters. */
+int n_cmpchars;
+
/* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */
unsigned char *_fetch_multibyte_char_p;
int _fetch_multibyte_char_len;
is not a composite character, the multi-byte form is set in WORKBUF
and STR points WORKBUF. The caller should allocate at least 4-byte
area at WORKBUF in advance. Returns the length of the multi-byte
- form.
+ form. If C is an invalid character to have a multi-byte form,
+ signal an error.
Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this
function directly if C can be an ASCII character. */
int c;
unsigned char *workbuf, **str;
{
- int charset;
- unsigned char c1, c2;
+ int charset, c1, c2;
if (COMPOSITE_CHAR_P (c))
{
}
else
{
- *str = workbuf;
- return 0;
+ error ("Invalid characer: %d", c);
}
}
SPLIT_NON_ASCII_CHAR (c, charset, c1, c2);
+ if (!charset
+ || ! CHARSET_DEFINED_P (charset)
+ || c1 >= 0 && c1 < 32
+ || c2 >= 0 && c2 < 32)
+ error ("Invalid characer: %d", c);
*str = workbuf;
*workbuf++ = CHARSET_LEADING_CODE_BASE (charset);
if (*workbuf = CHARSET_LEADING_CODE_EXT (charset))
workbuf++;
*workbuf++ = c1 | 0x80;
- if (c2)
+ if (c2 >= 0)
*workbuf++ = c2 | 0x80;
return (workbuf - *str);
return 0;
}
+/* Return a character unified with C (or a character made of CHARSET,
+ C1, and C2) in unification table TABLE. If no unification is found
+ in TABLE, return C. */
+unify_char (table, c, charset, c1, c2)
+ Lisp_Object table;
+ int c, charset, c1, c2;
+{
+ Lisp_Object ch;
+ int alt_charset, alt_c1, alt_c2, dimension;
+
+ if (c < 0) c = MAKE_CHAR (charset, c1, c2);
+ if (!CHAR_TABLE_P (table)
+ || (ch = Faref (table, make_number (c)), !INTEGERP (ch))
+ || XINT (ch) < 0)
+ return c;
+
+ SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
+ dimension = CHARSET_DIMENSION (alt_charset);
+ if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
+ /* CH is not a generic character, just return it. */
+ return XFASTINT (ch);
+
+ /* Since CH is a generic character, we must return a specific
+ charater which has the same position codes as C from CH. */
+ if (charset < 0)
+ SPLIT_CHAR (c, charset, c1, c2);
+ if (dimension != CHARSET_DIMENSION (charset))
+ /* We can't make such a character because of dimension mismatch. */
+ return c;
+ if (!alt_c1) alt_c1 = c1;
+ if (!alt_c2) alt_c2 = c2;
+ return MAKE_CHAR (alt_charset, c1, c2);
+}
+
/* Update the table Vcharset_table with the given arguments (see the
document of `define-charset' for the meaning of each argument).
Several other table contents are also updated. The caller should
int bytes;
unsigned char leading_code_base, leading_code_ext;
- if (NILP (Faref (Vcharset_table, charset_id)))
- Faset (Vcharset_table, charset_id,
- Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil));
+ if (NILP (CHARSET_TABLE_ENTRY (charset)))
+ CHARSET_TABLE_ENTRY (charset)
+ = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
/* Get byte length of multibyte form, base leading-code, and
extended leading-code of the charset. See the comment under the
is set to nil. */
int i;
- for (i = 0; i < MAX_CHARSET; i++)
+ for (i = 0; i <= MAX_CHARSET; i++)
if (!NILP (CHARSET_TABLE_ENTRY (i)))
{
if (CHARSET_DIMENSION (i) == XINT (dimension)
break;
}
}
- if (i >= MAX_CHARSET)
+ if (i > MAX_CHARSET)
/* No such a charset. */
CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
= make_number (-1);
DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
"Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
-If CHARSET-ID is nil, it is set automatically, which means CHARSET is\n\
+If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
treated as a private charset.\n\
INFO-VECTOR is a vector of the format:\n\
[DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
vec[4], vec[5], vec[6], vec[7], vec[8]);
- Fput (charset_symbol, Qcharset, Faref (Vcharset_table, charset_id));
+ Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
Vcharset_list = Fcons (charset_symbol, Vcharset_list);
return Qnil;
/* Return number of different charsets in STR of length LEN. In
addition, for each found charset N, CHARSETS[N] is set 1. The
- caller should allocate CHARSETS (MAX_CHARSET bytes) in advance. */
+ caller should allocate CHARSETS (MAX_CHARSET + 1 bytes) in advance.
+ It may lookup a unification table TABLE if supplied. */
int
-find_charset_in_str (str, len, charsets)
- unsigned char *str, *charsets;
- int len;
+find_charset_in_str (str, len, charsets, table)
+ unsigned char *str;
+ int len, *charsets;
+ Lisp_Object table;
{
int num = 0;
+ if (! CHAR_TABLE_P (table))
+ table = Qnil;
+
while (len > 0)
{
int bytes = BYTES_BY_CHAR_HEAD (*str);
- int charset = CHARSET_AT (str);
+ int charset;
+
+ if (NILP (table))
+ charset = CHARSET_AT (str);
+ else
+ {
+ int c, charset;
+ unsigned char c1, c2;
+
+ SPLIT_STRING(str, bytes, charset, c1, c2);
+ if ((c = unify_char (table, -1, charset, c1, c2)) >= 0)
+ charset = CHAR_CHARSET (c);
+ }
if (!charsets[charset])
{
}
DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
- 2, 2, 0,
+ 2, 3, 0,
"Return a list of charsets in the region between BEG and END.\n\
-BEG and END are buffer positions.")
- (beg, end)
- Lisp_Object beg, end;
+BEG and END are buffer positions.\n\
+Optional arg TABLE if non-nil is a unification table to look up.")
+ (beg, end, table)
+ Lisp_Object beg, end, table;
{
- char charsets[MAX_CHARSET];
+ int charsets[MAX_CHARSET + 1];
int from, to, stop, i;
Lisp_Object val;
stop = to = XFASTINT (end);
if (from < GPT && GPT < to)
stop = GPT;
- bzero (charsets, MAX_CHARSET);
+ bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
while (1)
{
- find_charset_in_str (POS_ADDR (from), stop - from, charsets);
+ find_charset_in_str (POS_ADDR (from), stop - from, charsets, table);
if (stop < to)
from = stop, stop = to;
else
break;
}
val = Qnil;
- for (i = MAX_CHARSET - 1; i >= 0; i--)
+ for (i = MAX_CHARSET; i >= 0; i--)
if (charsets[i])
val = Fcons (CHARSET_SYMBOL (i), val);
return val;
}
DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
- 1, 1, 0,
- "Return a list of charsets in STR.")
- (str)
- Lisp_Object str;
+ 1, 2, 0,
+ "Return a list of charsets in STR.\n\
+Optional arg TABLE if non-nil is a unification table to look up.")
+ (str, table)
+ Lisp_Object str, table;
{
- char charsets[MAX_CHARSET];
+ char charsets[MAX_CHARSET + 1];
int i;
Lisp_Object val;
CHECK_STRING (str, 0);
- bzero (charsets, MAX_CHARSET);
- find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, charsets);
+ bzero (charsets, MAX_CHARSET + 1);
+ find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size,
+ charsets, table);
val = Qnil;
- for (i = MAX_CHARSET - 1; i >= 0; i--)
+ for (i = MAX_CHARSET; i >= 0; i--)
if (charsets[i])
val = Fcons (CHARSET_SYMBOL (i), val);
return val;
}
\f
DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
- "Return a character of CHARSET and position-codes CODE1 and CODE2.\n\
-CODE1 and CODE2 are optional, but if you don't supply\n\
- sufficient position-codes, return a generic character which stands for\n\
-all characters or group of characters in the character sets.\n\
-A generic character can be an argument of `modify-syntax-entry' and\n\
-`modify-category-entry'.")
+ "")
(charset, code1, code2)
Lisp_Object charset, code1, code2;
{
Lisp_Object ch;
{
Lisp_Object val;
- int charset;
- unsigned char c1, c2;
+ int charset, c1, c2;
CHECK_NUMBER (ch, 0);
SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
- return ((charset == CHARSET_COMPOSITION || CHARSET_DIMENSION (charset) == 2)
+ return (c2 >= 0
? Fcons (CHARSET_SYMBOL (charset),
Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
: Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
#define ONE_BYTE_CHAR_WIDTH(c) \
(c < 0x20 \
? (c == '\t' \
- ? current_buffer->tab_width \
+ ? XFASTINT (current_buffer->tab_width) \
: (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \
: (c < 0x7f \
? 1 \
(ch)
Lisp_Object ch;
{
- Lisp_Object val;
+ Lisp_Object val, disp;
int c;
CHECK_NUMBER (ch, 0);
- c = XFASTINT (ch);
- if (SINGLE_BYTE_CHAR_P (c))
- XSETFASTINT (val, ONE_BYTE_CHAR_WIDTH (c));
+ c = XINT (ch);
+
+ /* Get the way the display table would display it. */
+ disp = DISP_CHAR_VECTOR (buffer_display_table (current_buffer), (c));
+
+ if (VECTORP (disp))
+ XSETINT (val, XVECTOR (disp)->size);
+ else if (SINGLE_BYTE_CHAR_P (c))
+ XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
else if (COMPOSITE_CHAR_P (c))
{
int id = COMPOSITE_CHAR_ID (XFASTINT (ch));
/* Return width of string STR of length LEN when displayed in the
current buffer. The width is measured by how many columns it
occupies on the screen. */
+
int
strwidth (str, len)
unsigned char *str;
{
unsigned char *endp = str + len;
int width = 0;
+ struct Lisp_Char_Table *dp = buffer_display_table (current_buffer);
- while (str < endp) {
- if (*str == LEADING_CODE_COMPOSITION)
- {
- int id = str_cmpchar_id (str, endp - str);
+ while (str < endp)
+ {
+ if (*str == LEADING_CODE_COMPOSITION)
+ {
+ int id = str_cmpchar_id (str, endp - str);
- if (id < 0)
- {
- width += 4;
- str++;
- }
- else
- {
- width += cmpchar_table[id]->width;
- str += cmpchar_table[id]->len;
- }
- }
- else
- {
- width += ONE_BYTE_CHAR_WIDTH (*str);
- str += BYTES_BY_CHAR_HEAD (*str);
- }
- }
+ if (id < 0)
+ {
+ width += 4;
+ str++;
+ }
+ else
+ {
+ width += cmpchar_table[id]->width;
+ str += cmpchar_table[id]->len;
+ }
+ }
+ else
+ {
+ Lisp_Object disp;
+ int thiswidth;
+ int c = STRING_CHAR (str, endp - str);
+
+ /* Get the way the display table would display it. */
+ if (dp)
+ disp = DISP_CHAR_VECTOR (dp, c);
+ else
+ disp = Qnil;
+
+ if (VECTORP (disp))
+ thiswidth = XVECTOR (disp)->size;
+ else
+ thiswidth = ONE_BYTE_CHAR_WIDTH (*str);
+
+ width += thiswidth;
+ str += BYTES_BY_CHAR_HEAD (*str);
+ }
+ }
return width;
}
DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0,
"Concatenate all the argument characters and make the result a string.")
- (nargs, args)
- int nargs;
+ (n, args)
+ int n;
Lisp_Object *args;
{
- int i, n = XINT (nargs);
+ int i;
unsigned char *buf
- = (unsigned char *) malloc (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
+ = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n);
unsigned char *p = buf;
Lisp_Object val;
}
val = make_string (buf, p - buf);
- free (buf);
return val;
}
}
#endif
-/* Table of pointers to the structure `cmpchar_info' indexed by
- CMPCHAR-ID. */
-struct cmpchar_info **cmpchar_table;
-/* The current size of `cmpchar_table'. */
-static int cmpchar_table_size;
-/* Number of the current composite characters. */
-int n_cmpchars;
-
#define CMPCHAR_HASH_TABLE_SIZE 0xFFF
static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE];
}
/* We have to register the composite character in cmpchar_table. */
+ if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK))
+ /* No, we have no more room for a new composite character. */
+ return -1;
+
/* Make the entry in hash table. */
if (hashp == NULL)
{
DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule,
2, 2, 0,
- "Return the IDXth composition rule embedded in composite character CHARACTER.
-The returned rule is for composing the IDXth component
-on the (IDX-1)th component. If IDX is 0, the returned value is always 255.")
- (character, idx)
- Lisp_Object character, idx;
+ "Return the Nth composition rule embedded in composite character CHARACTER.\n\
+The returned rule is for composing the Nth component\n\
+on the (N-1)th component. If N is 0, the returned value is always 255.")
+ (character, n)
+ Lisp_Object character, n;
{
int id, i;
CHECK_NUMBER (character, 0);
- CHECK_NUMBER (idx, 1);
+ CHECK_NUMBER (n, 1);
id = COMPOSITE_CHAR_ID (XINT (character));
if (id < 0 || id >= n_cmpchars)
error ("Invalid composite character: %d", XINT (character));
- i = XINT (idx);
+ i = XINT (n);
if (i > cmpchar_table[id]->glyph_len)
- args_out_of_range (character, idx);
+ args_out_of_range (character, n);
return make_number (cmpchar_table[id]->cmp_rule[i]);
}
Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
- Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET), Qnil);
+ Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil);
/* Setup tables. */
for (i = 0; i < 2; i++)