X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/4ed4686978bd18292e2bb7b87a7b0e0407ecb3b1..c1bb59cab27d650e7dabd01d1feb3cdce602bd32:/src/charset.c diff --git a/src/charset.c b/src/charset.c index b962f346f2..65a9956123 100644 --- a/src/charset.c +++ b/src/charset.c @@ -1,22 +1,24 @@ /* Multilingual characters handler. Ver.1.0 - Copyright (C) 1995 Free Software Foundation, Inc. Copyright (C) 1995 Electrotechnical Laboratory, JAPAN. - This program is free software; you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 2, or (at your option) - any later version. +This file is part of GNU Emacs. + +GNU Emacs is free software; you can redistribute it and/or modify +it under the terms of the GNU General Public License as published by +the Free Software Foundation; either version 2, or (at your option) +any later version. - This program is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. +GNU Emacs is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. - You should have received a copy of the GNU General Public License - along with this program; if not, write to the Free Software - Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +You should have received a copy of the GNU General Public License +along with GNU Emacs; see the file COPYING. If not, write to +the Free Software Foundation, Inc., 59 Temple Place - Suite 330, +Boston, MA 02111-1307, USA. */ /* At first, see the document in `charset.h' to understand the code in this file. */ @@ -31,6 +33,7 @@ #include "buffer.h" #include "charset.h" #include "coding.h" +#include "disptab.h" #else /* not emacs */ @@ -78,6 +81,14 @@ int width_by_char_head[256]; CHARS, and FINAL-CHAR) to Emacs' charset. */ int iso_charset_table[2][2][128]; +/* Table of pointers to the structure `cmpchar_info' indexed by + CMPCHAR-ID. */ +struct cmpchar_info **cmpchar_table; +/* The current size of `cmpchar_table'. */ +static int cmpchar_table_size; +/* Number of the current composite characters. */ +int n_cmpchars; + /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR. */ unsigned char *_fetch_multibyte_char_p; int _fetch_multibyte_char_len; @@ -86,7 +97,8 @@ int _fetch_multibyte_char_len; is not a composite character, the multi-byte form is set in WORKBUF and STR points WORKBUF. The caller should allocate at least 4-byte area at WORKBUF in advance. Returns the length of the multi-byte - form. + form. If C is an invalid character to have a multi-byte form, + signal an error. Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this function directly if C can be an ASCII character. */ @@ -96,8 +108,7 @@ non_ascii_char_to_string (c, workbuf, str) int c; unsigned char *workbuf, **str; { - int charset; - unsigned char c1, c2; + int charset, c1, c2; if (COMPOSITE_CHAR_P (c)) { @@ -110,19 +121,23 @@ non_ascii_char_to_string (c, workbuf, str) } else { - *str = workbuf; - return 0; + error ("Invalid characer: %d", c); } } SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); + if (!charset + || ! CHARSET_DEFINED_P (charset) + || c1 >= 0 && c1 < 32 + || c2 >= 0 && c2 < 32) + error ("Invalid characer: %d", c); *str = workbuf; *workbuf++ = CHARSET_LEADING_CODE_BASE (charset); if (*workbuf = CHARSET_LEADING_CODE_EXT (charset)) workbuf++; *workbuf++ = c1 | 0x80; - if (c2) + if (c2 >= 0) *workbuf++ = c2 | 0x80; return (workbuf - *str); @@ -216,6 +231,40 @@ split_non_ascii_string (str, len, charset, c1, c2) return 0; } +/* Return a character unified with C (or a character made of CHARSET, + C1, and C2) in unification table TABLE. If no unification is found + in TABLE, return C. */ +unify_char (table, c, charset, c1, c2) + Lisp_Object table; + int c, charset, c1, c2; +{ + Lisp_Object ch; + int alt_charset, alt_c1, alt_c2, dimension; + + if (c < 0) c = MAKE_CHAR (charset, c1, c2); + if (!CHAR_TABLE_P (table) + || (ch = Faref (table, make_number (c)), !INTEGERP (ch)) + || XINT (ch) < 0) + return c; + + SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2); + dimension = CHARSET_DIMENSION (alt_charset); + if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0) + /* CH is not a generic character, just return it. */ + return XFASTINT (ch); + + /* Since CH is a generic character, we must return a specific + charater which has the same position codes as C from CH. */ + if (charset < 0) + SPLIT_CHAR (c, charset, c1, c2); + if (dimension != CHARSET_DIMENSION (charset)) + /* We can't make such a character because of dimension mismatch. */ + return c; + if (!alt_c1) alt_c1 = c1; + if (!alt_c2) alt_c2 = c2; + return MAKE_CHAR (alt_charset, c1, c2); +} + /* Update the table Vcharset_table with the given arguments (see the document of `define-charset' for the meaning of each argument). Several other table contents are also updated. The caller should @@ -234,9 +283,9 @@ update_charset_table (charset_id, dimension, chars, width, direction, int bytes; unsigned char leading_code_base, leading_code_ext; - if (NILP (Faref (Vcharset_table, charset_id))) - Faset (Vcharset_table, charset_id, - Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil)); + if (NILP (CHARSET_TABLE_ENTRY (charset))) + CHARSET_TABLE_ENTRY (charset) + = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil); /* Get byte length of multibyte form, base leading-code, and extended leading-code of the charset. See the comment under the @@ -291,7 +340,7 @@ update_charset_table (charset_id, dimension, chars, width, direction, is set to nil. */ int i; - for (i = 0; i < MAX_CHARSET; i++) + for (i = 0; i <= MAX_CHARSET; i++) if (!NILP (CHARSET_TABLE_ENTRY (i))) { if (CHARSET_DIMENSION (i) == XINT (dimension) @@ -305,7 +354,7 @@ update_charset_table (charset_id, dimension, chars, width, direction, break; } } - if (i >= MAX_CHARSET) + if (i > MAX_CHARSET) /* No such a charset. */ CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX) = make_number (-1); @@ -381,7 +430,7 @@ get_new_private_charset_id (dimension, width) DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0, "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\ -If CHARSET-ID is nil, it is set automatically, which means CHARSET is\n\ +If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\ treated as a private charset.\n\ INFO-VECTOR is a vector of the format:\n\ [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\ @@ -450,7 +499,7 @@ DESCRIPTION (string) is the description string of the charset.") update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3], vec[4], vec[5], vec[6], vec[7], vec[8]); - Fput (charset_symbol, Qcharset, Faref (Vcharset_table, charset_id)); + Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id))); CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol; Vcharset_list = Fcons (charset_symbol, Vcharset_list); return Qnil; @@ -485,19 +534,36 @@ CHARSET should be defined by `defined-charset' in advance.") /* Return number of different charsets in STR of length LEN. In addition, for each found charset N, CHARSETS[N] is set 1. The - caller should allocate CHARSETS (MAX_CHARSET bytes) in advance. */ + caller should allocate CHARSETS (MAX_CHARSET + 1 bytes) in advance. + It may lookup a unification table TABLE if supplied. */ int -find_charset_in_str (str, len, charsets) - unsigned char *str, *charsets; - int len; +find_charset_in_str (str, len, charsets, table) + unsigned char *str; + int len, *charsets; + Lisp_Object table; { int num = 0; + if (! CHAR_TABLE_P (table)) + table = Qnil; + while (len > 0) { int bytes = BYTES_BY_CHAR_HEAD (*str); - int charset = CHARSET_AT (str); + int charset; + + if (NILP (table)) + charset = CHARSET_AT (str); + else + { + int c, charset; + unsigned char c1, c2; + + SPLIT_STRING(str, bytes, charset, c1, c2); + if ((c = unify_char (table, -1, charset, c1, c2)) >= 0) + charset = CHAR_CHARSET (c); + } if (!charsets[charset]) { @@ -511,13 +577,14 @@ find_charset_in_str (str, len, charsets) } DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region, - 2, 2, 0, + 2, 3, 0, "Return a list of charsets in the region between BEG and END.\n\ -BEG and END are buffer positions.") - (beg, end) - Lisp_Object beg, end; +BEG and END are buffer positions.\n\ +Optional arg TABLE if non-nil is a unification table to look up.") + (beg, end, table) + Lisp_Object beg, end, table; { - char charsets[MAX_CHARSET]; + int charsets[MAX_CHARSET + 1]; int from, to, stop, i; Lisp_Object val; @@ -526,49 +593,46 @@ BEG and END are buffer positions.") stop = to = XFASTINT (end); if (from < GPT && GPT < to) stop = GPT; - bzero (charsets, MAX_CHARSET); + bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); while (1) { - find_charset_in_str (POS_ADDR (from), stop - from, charsets); + find_charset_in_str (POS_ADDR (from), stop - from, charsets, table); if (stop < to) from = stop, stop = to; else break; } val = Qnil; - for (i = MAX_CHARSET - 1; i >= 0; i--) + for (i = MAX_CHARSET; i >= 0; i--) if (charsets[i]) val = Fcons (CHARSET_SYMBOL (i), val); return val; } DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string, - 1, 1, 0, - "Return a list of charsets in STR.") - (str) - Lisp_Object str; + 1, 2, 0, + "Return a list of charsets in STR.\n\ +Optional arg TABLE if non-nil is a unification table to look up.") + (str, table) + Lisp_Object str, table; { - char charsets[MAX_CHARSET]; + char charsets[MAX_CHARSET + 1]; int i; Lisp_Object val; CHECK_STRING (str, 0); - bzero (charsets, MAX_CHARSET); - find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, charsets); + bzero (charsets, MAX_CHARSET + 1); + find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, + charsets, table); val = Qnil; - for (i = MAX_CHARSET - 1; i >= 0; i--) + for (i = MAX_CHARSET; i >= 0; i--) if (charsets[i]) val = Fcons (CHARSET_SYMBOL (i), val); return val; } DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0, - "Return a character of CHARSET and position-codes CODE1 and CODE2.\n\ -CODE1 and CODE2 are optional, but if you don't supply\n\ - sufficient position-codes, return a generic character which stands for\n\ -all characters or group of characters in the character sets.\n\ -A generic character can be an argument of `modify-syntax-entry' and\n\ -`modify-category-entry'.") + "") (charset, code1, code2) Lisp_Object charset, code1, code2; { @@ -595,12 +659,11 @@ DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0, Lisp_Object ch; { Lisp_Object val; - int charset; - unsigned char c1, c2; + int charset, c1, c2; CHECK_NUMBER (ch, 0); SPLIT_CHAR (XFASTINT (ch), charset, c1, c2); - return ((charset == CHARSET_COMPOSITION || CHARSET_DIMENSION (charset) == 2) + return (c2 >= 0 ? Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Fcons (make_number (c2), Qnil))) : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil))); @@ -665,7 +728,7 @@ DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0, #define ONE_BYTE_CHAR_WIDTH(c) \ (c < 0x20 \ ? (c == '\t' \ - ? current_buffer->tab_width \ + ? XFASTINT (current_buffer->tab_width) \ : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2))) \ : (c < 0x7f \ ? 1 \ @@ -683,14 +746,20 @@ The width is measured by how many columns it occupies on the screen.") (ch) Lisp_Object ch; { - Lisp_Object val; + Lisp_Object val, disp; int c; CHECK_NUMBER (ch, 0); - c = XFASTINT (ch); - if (SINGLE_BYTE_CHAR_P (c)) - XSETFASTINT (val, ONE_BYTE_CHAR_WIDTH (c)); + c = XINT (ch); + + /* Get the way the display table would display it. */ + disp = DISP_CHAR_VECTOR (buffer_display_table (current_buffer), (c)); + + if (VECTORP (disp)) + XSETINT (val, XVECTOR (disp)->size); + else if (SINGLE_BYTE_CHAR_P (c)) + XSETINT (val, ONE_BYTE_CHAR_WIDTH (c)); else if (COMPOSITE_CHAR_P (c)) { int id = COMPOSITE_CHAR_ID (XFASTINT (ch)); @@ -708,6 +777,7 @@ The width is measured by how many columns it occupies on the screen.") /* Return width of string STR of length LEN when displayed in the current buffer. The width is measured by how many columns it occupies on the screen. */ + int strwidth (str, len) unsigned char *str; @@ -715,29 +785,46 @@ strwidth (str, len) { unsigned char *endp = str + len; int width = 0; + struct Lisp_Char_Table *dp = buffer_display_table (current_buffer); - while (str < endp) { - if (*str == LEADING_CODE_COMPOSITION) - { - int id = str_cmpchar_id (str, endp - str); + while (str < endp) + { + if (*str == LEADING_CODE_COMPOSITION) + { + int id = str_cmpchar_id (str, endp - str); - if (id < 0) - { - width += 4; - str++; - } - else - { - width += cmpchar_table[id]->width; - str += cmpchar_table[id]->len; - } - } - else - { - width += ONE_BYTE_CHAR_WIDTH (*str); - str += BYTES_BY_CHAR_HEAD (*str); - } - } + if (id < 0) + { + width += 4; + str++; + } + else + { + width += cmpchar_table[id]->width; + str += cmpchar_table[id]->len; + } + } + else + { + Lisp_Object disp; + int thiswidth; + int c = STRING_CHAR (str, endp - str); + + /* Get the way the display table would display it. */ + if (dp) + disp = DISP_CHAR_VECTOR (dp, c); + else + disp = Qnil; + + if (VECTORP (disp)) + thiswidth = XVECTOR (disp)->size; + else + thiswidth = ONE_BYTE_CHAR_WIDTH (*str); + + width += thiswidth; + str += BYTES_BY_CHAR_HEAD (*str); + } + } return width; } @@ -842,13 +929,13 @@ If POS is out of range or not at character boundary, return NIL.") DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0, "Concatenate all the argument characters and make the result a string.") - (nargs, args) - int nargs; + (n, args) + int n; Lisp_Object *args; { - int i, n = XINT (nargs); + int i; unsigned char *buf - = (unsigned char *) malloc (MAX_LENGTH_OF_MULTI_BYTE_FORM * n); + = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n); unsigned char *p = buf; Lisp_Object val; @@ -871,7 +958,6 @@ DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0, } val = make_string (buf, p - buf); - free (buf); return val; } @@ -908,14 +994,6 @@ hash_string (ptr, len) } #endif -/* Table of pointers to the structure `cmpchar_info' indexed by - CMPCHAR-ID. */ -struct cmpchar_info **cmpchar_table; -/* The current size of `cmpchar_table'. */ -static int cmpchar_table_size; -/* Number of the current composite characters. */ -int n_cmpchars; - #define CMPCHAR_HASH_TABLE_SIZE 0xFFF static int *cmpchar_hash_table[CMPCHAR_HASH_TABLE_SIZE]; @@ -991,6 +1069,10 @@ str_cmpchar_id (str, len) } /* We have to register the composite character in cmpchar_table. */ + if (n_cmpchars > (CHAR_FIELD2_MASK | CHAR_FIELD3_MASK)) + /* No, we have no more room for a new composite character. */ + return -1; + /* Make the entry in hash table. */ if (hashp == NULL) { @@ -1182,23 +1264,23 @@ DEFUN ("composite-char-component", Fcmpchar_component, Scmpchar_component, DEFUN ("composite-char-composition-rule", Fcmpchar_cmp_rule, Scmpchar_cmp_rule, 2, 2, 0, - "Return the IDXth composition rule embedded in composite character CHARACTER. -The returned rule is for composing the IDXth component -on the (IDX-1)th component. If IDX is 0, the returned value is always 255.") - (character, idx) - Lisp_Object character, idx; + "Return the Nth composition rule embedded in composite character CHARACTER.\n\ +The returned rule is for composing the Nth component\n\ +on the (N-1)th component. If N is 0, the returned value is always 255.") + (character, n) + Lisp_Object character, n; { int id, i; CHECK_NUMBER (character, 0); - CHECK_NUMBER (idx, 1); + CHECK_NUMBER (n, 1); id = COMPOSITE_CHAR_ID (XINT (character)); if (id < 0 || id >= n_cmpchars) error ("Invalid composite character: %d", XINT (character)); - i = XINT (idx); + i = XINT (n); if (i > cmpchar_table[id]->glyph_len) - args_out_of_range (character, idx); + args_out_of_range (character, n); return make_number (cmpchar_table[id]->cmp_rule[i]); } @@ -1343,7 +1425,7 @@ init_charset_once () Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0)); Vcharset_table = Fmake_char_table (Qcharset_table, Qnil); - Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET), Qnil); + Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1), Qnil); /* Setup tables. */ for (i = 0; i < 2; i++)