X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/53316e5587d45deff762fe4ce46553a4620c0bdc..c1bb59cab27d650e7dabd01d1feb3cdce602bd32:/src/charset.c diff --git a/src/charset.c b/src/charset.c index fbed6283da..65a9956123 100644 --- a/src/charset.c +++ b/src/charset.c @@ -33,6 +33,7 @@ Boston, MA 02111-1307, USA. */ #include "buffer.h" #include "charset.h" #include "coding.h" +#include "disptab.h" #else /* not emacs */ @@ -96,7 +97,8 @@ int _fetch_multibyte_char_len; is not a composite character, the multi-byte form is set in WORKBUF and STR points WORKBUF. The caller should allocate at least 4-byte area at WORKBUF in advance. Returns the length of the multi-byte - form. + form. If C is an invalid character to have a multi-byte form, + signal an error. Use macro `CHAR_STRING (C, WORKBUF, STR)' instead of calling this function directly if C can be an ASCII character. */ @@ -119,12 +121,16 @@ non_ascii_char_to_string (c, workbuf, str) } else { - *str = workbuf; - return 0; + error ("Invalid characer: %d", c); } } SPLIT_NON_ASCII_CHAR (c, charset, c1, c2); + if (!charset + || ! CHARSET_DEFINED_P (charset) + || c1 >= 0 && c1 < 32 + || c2 >= 0 && c2 < 32) + error ("Invalid characer: %d", c); *str = workbuf; *workbuf++ = CHARSET_LEADING_CODE_BASE (charset); @@ -225,6 +231,40 @@ split_non_ascii_string (str, len, charset, c1, c2) return 0; } +/* Return a character unified with C (or a character made of CHARSET, + C1, and C2) in unification table TABLE. If no unification is found + in TABLE, return C. */ +unify_char (table, c, charset, c1, c2) + Lisp_Object table; + int c, charset, c1, c2; +{ + Lisp_Object ch; + int alt_charset, alt_c1, alt_c2, dimension; + + if (c < 0) c = MAKE_CHAR (charset, c1, c2); + if (!CHAR_TABLE_P (table) + || (ch = Faref (table, make_number (c)), !INTEGERP (ch)) + || XINT (ch) < 0) + return c; + + SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2); + dimension = CHARSET_DIMENSION (alt_charset); + if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0) + /* CH is not a generic character, just return it. */ + return XFASTINT (ch); + + /* Since CH is a generic character, we must return a specific + charater which has the same position codes as C from CH. */ + if (charset < 0) + SPLIT_CHAR (c, charset, c1, c2); + if (dimension != CHARSET_DIMENSION (charset)) + /* We can't make such a character because of dimension mismatch. */ + return c; + if (!alt_c1) alt_c1 = c1; + if (!alt_c2) alt_c2 = c2; + return MAKE_CHAR (alt_charset, c1, c2); +} + /* Update the table Vcharset_table with the given arguments (see the document of `define-charset' for the meaning of each argument). Several other table contents are also updated. The caller should @@ -390,7 +430,7 @@ get_new_private_charset_id (dimension, width) DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0, "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\ -If CHARSET-ID is nil, it is set automatically, which means CHARSET is\n\ +If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\ treated as a private charset.\n\ INFO-VECTOR is a vector of the format:\n\ [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\ @@ -494,19 +534,36 @@ CHARSET should be defined by `defined-charset' in advance.") /* Return number of different charsets in STR of length LEN. In addition, for each found charset N, CHARSETS[N] is set 1. The - caller should allocate CHARSETS (MAX_CHARSET + 1 bytes) in advance. */ + caller should allocate CHARSETS (MAX_CHARSET + 1 bytes) in advance. + It may lookup a unification table TABLE if supplied. */ int -find_charset_in_str (str, len, charsets) - unsigned char *str, *charsets; - int len; +find_charset_in_str (str, len, charsets, table) + unsigned char *str; + int len, *charsets; + Lisp_Object table; { int num = 0; + if (! CHAR_TABLE_P (table)) + table = Qnil; + while (len > 0) { int bytes = BYTES_BY_CHAR_HEAD (*str); - int charset = CHARSET_AT (str); + int charset; + + if (NILP (table)) + charset = CHARSET_AT (str); + else + { + int c, charset; + unsigned char c1, c2; + + SPLIT_STRING(str, bytes, charset, c1, c2); + if ((c = unify_char (table, -1, charset, c1, c2)) >= 0) + charset = CHAR_CHARSET (c); + } if (!charsets[charset]) { @@ -520,13 +577,14 @@ find_charset_in_str (str, len, charsets) } DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region, - 2, 2, 0, + 2, 3, 0, "Return a list of charsets in the region between BEG and END.\n\ -BEG and END are buffer positions.") - (beg, end) - Lisp_Object beg, end; +BEG and END are buffer positions.\n\ +Optional arg TABLE if non-nil is a unification table to look up.") + (beg, end, table) + Lisp_Object beg, end, table; { - char charsets[MAX_CHARSET + 1]; + int charsets[MAX_CHARSET + 1]; int from, to, stop, i; Lisp_Object val; @@ -535,10 +593,10 @@ BEG and END are buffer positions.") stop = to = XFASTINT (end); if (from < GPT && GPT < to) stop = GPT; - bzero (charsets, MAX_CHARSET + 1); + bzero (charsets, (MAX_CHARSET + 1) * sizeof (int)); while (1) { - find_charset_in_str (POS_ADDR (from), stop - from, charsets); + find_charset_in_str (POS_ADDR (from), stop - from, charsets, table); if (stop < to) from = stop, stop = to; else @@ -552,10 +610,11 @@ BEG and END are buffer positions.") } DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string, - 1, 1, 0, - "Return a list of charsets in STR.") - (str) - Lisp_Object str; + 1, 2, 0, + "Return a list of charsets in STR.\n\ +Optional arg TABLE if non-nil is a unification table to look up.") + (str, table) + Lisp_Object str, table; { char charsets[MAX_CHARSET + 1]; int i; @@ -563,7 +622,8 @@ DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string, CHECK_STRING (str, 0); bzero (charsets, MAX_CHARSET + 1); - find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, charsets); + find_charset_in_str (XSTRING (str)->data, XSTRING (str)->size, + charsets, table); val = Qnil; for (i = MAX_CHARSET; i >= 0; i--) if (charsets[i]) @@ -686,14 +746,20 @@ The width is measured by how many columns it occupies on the screen.") (ch) Lisp_Object ch; { - Lisp_Object val; + Lisp_Object val, disp; int c; CHECK_NUMBER (ch, 0); - c = XFASTINT (ch); - if (SINGLE_BYTE_CHAR_P (c)) - XSETFASTINT (val, ONE_BYTE_CHAR_WIDTH (c)); + c = XINT (ch); + + /* Get the way the display table would display it. */ + disp = DISP_CHAR_VECTOR (buffer_display_table (current_buffer), (c)); + + if (VECTORP (disp)) + XSETINT (val, XVECTOR (disp)->size); + else if (SINGLE_BYTE_CHAR_P (c)) + XSETINT (val, ONE_BYTE_CHAR_WIDTH (c)); else if (COMPOSITE_CHAR_P (c)) { int id = COMPOSITE_CHAR_ID (XFASTINT (ch)); @@ -711,6 +777,7 @@ The width is measured by how many columns it occupies on the screen.") /* Return width of string STR of length LEN when displayed in the current buffer. The width is measured by how many columns it occupies on the screen. */ + int strwidth (str, len) unsigned char *str; @@ -718,29 +785,46 @@ strwidth (str, len) { unsigned char *endp = str + len; int width = 0; + struct Lisp_Char_Table *dp = buffer_display_table (current_buffer); - while (str < endp) { - if (*str == LEADING_CODE_COMPOSITION) - { - int id = str_cmpchar_id (str, endp - str); + while (str < endp) + { + if (*str == LEADING_CODE_COMPOSITION) + { + int id = str_cmpchar_id (str, endp - str); - if (id < 0) - { - width += 4; - str++; - } - else - { - width += cmpchar_table[id]->width; - str += cmpchar_table[id]->len; - } - } - else - { - width += ONE_BYTE_CHAR_WIDTH (*str); - str += BYTES_BY_CHAR_HEAD (*str); - } - } + if (id < 0) + { + width += 4; + str++; + } + else + { + width += cmpchar_table[id]->width; + str += cmpchar_table[id]->len; + } + } + else + { + Lisp_Object disp; + int thiswidth; + int c = STRING_CHAR (str, endp - str); + + /* Get the way the display table would display it. */ + if (dp) + disp = DISP_CHAR_VECTOR (dp, c); + else + disp = Qnil; + + if (VECTORP (disp)) + thiswidth = XVECTOR (disp)->size; + else + thiswidth = ONE_BYTE_CHAR_WIDTH (*str); + + width += thiswidth; + str += BYTES_BY_CHAR_HEAD (*str); + } + } return width; } @@ -851,7 +935,7 @@ DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0, { int i; unsigned char *buf - = (unsigned char *) malloc (MAX_LENGTH_OF_MULTI_BYTE_FORM * n); + = (unsigned char *) alloca (MAX_LENGTH_OF_MULTI_BYTE_FORM * n); unsigned char *p = buf; Lisp_Object val; @@ -874,7 +958,6 @@ DEFUN ("concat-chars", Fconcat_chars, Sconcat_chars, 1, MANY, 0, } val = make_string (buf, p - buf); - free (buf); return val; }