X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/1099930585662f32278796f9943ac8b50a1179f1..41d579ce4a2a86428f200788df4b15b936aa5076:/src/character.c diff --git a/src/character.c b/src/character.c index c4a3a008bc..397481e5b3 100644 --- a/src/character.c +++ b/src/character.c @@ -1,11 +1,11 @@ /* Basic character support. - Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN. - Licensed to the Free Software Foundation. - Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 - Free Software Foundation, Inc. - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010 - National Institute of Advanced Industrial Science and Technology (AIST) - Registration Number H13PRO009 + +Copyright (C) 2001-2011 Free Software Foundation, Inc. +Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN. + Licensed to the Free Software Foundation. +Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. @@ -50,43 +50,17 @@ along with GNU Emacs. If not, see . */ Lisp_Object Qcharacterp; -/* Vector of translation table ever defined. - ID of a translation table is used to index this vector. */ -Lisp_Object Vtranslation_table_vector; - -/* A char-table for characters which may invoke auto-filling. */ -Lisp_Object Vauto_fill_chars; - Lisp_Object Qauto_fill_chars; /* Char-table of information about which character to unify to which Unicode character. Mainly used by the macro MAYBE_UNIFY_CHAR. */ Lisp_Object Vchar_unify_table; -/* A char-table. An element is non-nil iff the corresponding - character has a printable glyph. */ -Lisp_Object Vprintable_chars; - -/* A char-table. An elemnent is a column-width of the corresponding - character. */ -Lisp_Object Vchar_width_table; - -/* A char-table. An element is a symbol indicating the direction - property of corresponding character. */ -Lisp_Object Vchar_direction_table; - /* Variable used locally in the macro FETCH_MULTIBYTE_CHAR. */ unsigned char *_fetch_multibyte_char_p; -/* Char table of scripts. */ -Lisp_Object Vchar_script_table; - -/* Alist of scripts vs representative characters. */ -Lisp_Object Vscript_representative_chars; - static Lisp_Object Qchar_script_table; -Lisp_Object Vunicode_category_table; /* If character code C has modifier masks, reflect them to the @@ -336,16 +310,6 @@ If the multibyte character does not represent a byte, return -1. */) } } -DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0, - doc: /* Return 1 regardless of the argument CHAR. -This is now an obsolete function. We keep it just for backward compatibility. -usage: (char-bytes CHAR) */) - (Lisp_Object ch) -{ - CHECK_CHARACTER (ch); - return make_number (1); -} - DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0, doc: /* Return width of CHAR when displayed in the current buffer. The width is measured by how many columns it occupies on the screen. @@ -378,11 +342,12 @@ usage: (char-width CHAR) */) characters and bytes of the substring in *NCHARS and *NBYTES respectively. */ -int -c_string_width (const unsigned char *str, int len, int precision, int *nchars, int *nbytes) +EMACS_INT +c_string_width (const unsigned char *str, EMACS_INT len, int precision, + EMACS_INT *nchars, EMACS_INT *nbytes) { - int i = 0, i_byte = 0; - int width = 0; + EMACS_INT i = 0, i_byte = 0; + EMACS_INT width = 0; struct Lisp_Char_Table *dp = buffer_display_table (); while (i_byte < len) @@ -429,10 +394,10 @@ c_string_width (const unsigned char *str, int len, int precision, int *nchars, i current buffer. The width is measured by how many columns it occupies on the screen. */ -int -strwidth (const unsigned char *str, int len) +EMACS_INT +strwidth (const char *str, EMACS_INT len) { - return c_string_width (str, len, -1, NULL, NULL); + return c_string_width ((const unsigned char *) str, len, -1, NULL, NULL); } /* Return width of Lisp string STRING when displayed in the current @@ -442,17 +407,18 @@ strwidth (const unsigned char *str, int len) PRECISION, and set number of characters and bytes of the substring in *NCHARS and *NBYTES respectively. */ -int -lisp_string_width (Lisp_Object string, int precision, int *nchars, int *nbytes) +EMACS_INT +lisp_string_width (Lisp_Object string, int precision, + EMACS_INT *nchars, EMACS_INT *nbytes) { - int len = SCHARS (string); + EMACS_INT len = SCHARS (string); /* This set multibyte to 0 even if STRING is multibyte when it contains only ascii and eight-bit-graphic, but that's intentional. */ int multibyte = len < SBYTES (string); unsigned char *str = SDATA (string); - int i = 0, i_byte = 0; - int width = 0; + EMACS_INT i = 0, i_byte = 0; + EMACS_INT width = 0; struct Lisp_Char_Table *dp = buffer_display_table (); while (i < len) @@ -570,11 +536,11 @@ EMACS_INT multibyte_chars_in_text (const unsigned char *ptr, EMACS_INT nbytes) { const unsigned char *endp = ptr + nbytes; - int chars = 0; + EMACS_INT chars = 0; while (ptr < endp) { - int len = MULTIBYTE_LENGTH (ptr, endp); + EMACS_INT len = MULTIBYTE_LENGTH (ptr, endp); if (len == 0) abort (); @@ -592,10 +558,11 @@ multibyte_chars_in_text (const unsigned char *ptr, EMACS_INT nbytes) represented by 2-byte in a multibyte text. */ void -parse_str_as_multibyte (const unsigned char *str, int len, int *nchars, int *nbytes) +parse_str_as_multibyte (const unsigned char *str, EMACS_INT len, + EMACS_INT *nchars, EMACS_INT *nbytes) { const unsigned char *endp = str + len; - int n, chars = 0, bytes = 0; + EMACS_INT n, chars = 0, bytes = 0; if (len >= MAX_MULTIBYTE_LENGTH) { @@ -633,12 +600,13 @@ parse_str_as_multibyte (const unsigned char *str, int len, int *nchars, int *nby area and that is enough. Return the number of bytes of the resulting text. */ -int -str_as_multibyte (unsigned char *str, int len, int nbytes, int *nchars) +EMACS_INT +str_as_multibyte (unsigned char *str, EMACS_INT len, EMACS_INT nbytes, + EMACS_INT *nchars) { unsigned char *p = str, *endp = str + nbytes; unsigned char *to; - int chars = 0; + EMACS_INT chars = 0; int n; if (nbytes >= MAX_MULTIBYTE_LENGTH) @@ -709,11 +677,11 @@ str_as_multibyte (unsigned char *str, int len, int nbytes, int *nchars) bytes it may ocupy when converted to multibyte string by `str_to_multibyte'. */ -int -parse_str_to_multibyte (const unsigned char *str, int len) +EMACS_INT +parse_str_to_multibyte (const unsigned char *str, EMACS_INT len) { const unsigned char *endp = str + len; - int bytes; + EMACS_INT bytes; for (bytes = 0; str < endp; str++) bytes += (*str < 0x80) ? 1 : 2; @@ -727,8 +695,8 @@ parse_str_to_multibyte (const unsigned char *str, int len) that we can use LEN bytes at STR as a work area and that is enough. */ -int -str_to_multibyte (unsigned char *str, int len, int bytes) +EMACS_INT +str_to_multibyte (unsigned char *str, EMACS_INT len, EMACS_INT bytes) { unsigned char *p = str, *endp = str + bytes; unsigned char *to; @@ -756,8 +724,8 @@ str_to_multibyte (unsigned char *str, int len, int bytes) actually converts characters in the range 0x80..0xFF to unibyte. */ -int -str_as_unibyte (unsigned char *str, int bytes) +EMACS_INT +str_as_unibyte (unsigned char *str, EMACS_INT bytes) { const unsigned char *p = str, *endp = str + bytes; unsigned char *to; @@ -818,14 +786,14 @@ str_to_unibyte (const unsigned char *src, unsigned char *dst, EMACS_INT chars, i } -int +EMACS_INT string_count_byte8 (Lisp_Object string) { int multibyte = STRING_MULTIBYTE (string); - int nbytes = SBYTES (string); + EMACS_INT nbytes = SBYTES (string); unsigned char *p = SDATA (string); unsigned char *pend = p + nbytes; - int count = 0; + EMACS_INT count = 0; int c, len; if (multibyte) @@ -851,10 +819,10 @@ string_count_byte8 (Lisp_Object string) Lisp_Object string_escape_byte8 (Lisp_Object string) { - int nchars = SCHARS (string); - int nbytes = SBYTES (string); + EMACS_INT nchars = SCHARS (string); + EMACS_INT nbytes = SBYTES (string); int multibyte = STRING_MULTIBYTE (string); - int byte8_count; + EMACS_INT byte8_count; const unsigned char *src, *src_end; unsigned char *dst; Lisp_Object val; @@ -869,12 +837,22 @@ string_escape_byte8 (Lisp_Object string) return string; if (multibyte) - /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */ - val = make_uninit_multibyte_string (nchars + byte8_count * 3, - nbytes + byte8_count * 2); + { + if ((MOST_POSITIVE_FIXNUM - nchars) / 3 < byte8_count + || (MOST_POSITIVE_FIXNUM - nbytes) / 2 < byte8_count) + error ("Maximum string size exceeded"); + + /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */ + val = make_uninit_multibyte_string (nchars + byte8_count * 3, + nbytes + byte8_count * 2); + } else - /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */ - val = make_uninit_string (nbytes + byte8_count * 3); + { + if ((MOST_POSITIVE_FIXNUM - nchars) / 3 < byte8_count) + error ("Maximum string size exceeded"); + /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */ + val = make_uninit_string (nbytes + byte8_count * 3); + } src = SDATA (string); src_end = src + nbytes; @@ -1059,7 +1037,6 @@ syms_of_character (void) defsubr (&Scharacterp); defsubr (&Sunibyte_char_to_multibyte); defsubr (&Smultibyte_char_to_unibyte); - defsubr (&Schar_bytes); defsubr (&Schar_width); defsubr (&Sstring_width); defsubr (&Schar_direction); @@ -1068,14 +1045,14 @@ syms_of_character (void) defsubr (&Schar_resolve_modifiers); defsubr (&Sget_byte); - DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector, + DEFVAR_LISP ("translation-table-vector", Vtranslation_table_vector, doc: /* Vector recording all translation tables ever defined. Each element is a pair (SYMBOL . TABLE) relating the table to the symbol naming it. The ID of a translation table is an index into this vector. */); Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil); - DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars, + DEFVAR_LISP ("auto-fill-chars", Vauto_fill_chars, doc: /* A char-table for characters which invoke auto-filling. Such characters have value t in this table. */); @@ -1083,7 +1060,7 @@ Such characters have value t in this table. */); CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt); CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt); - DEFVAR_LISP ("char-width-table", &Vchar_width_table, + DEFVAR_LISP ("char-width-table", Vchar_width_table, doc: /* A char-table for width (columns) of each character. */); Vchar_width_table = Fmake_char_table (Qnil, make_number (1)); @@ -1091,11 +1068,11 @@ A char-table for width (columns) of each character. */); char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR, make_number (4)); - DEFVAR_LISP ("char-direction-table", &Vchar_direction_table, + DEFVAR_LISP ("char-direction-table", Vchar_direction_table, doc: /* A char-table for direction of each character. */); Vchar_direction_table = Fmake_char_table (Qnil, make_number (1)); - DEFVAR_LISP ("printable-chars", &Vprintable_chars, + DEFVAR_LISP ("printable-chars", Vprintable_chars, doc: /* A char-table for each printable character. */); Vprintable_chars = Fmake_char_table (Qnil, Qnil); Fset_char_table_range (Vprintable_chars, @@ -1104,7 +1081,7 @@ A char-table for width (columns) of each character. */); Fcons (make_number (160), make_number (MAX_5_BYTE_CHAR)), Qt); - DEFVAR_LISP ("char-script-table", &Vchar_script_table, + DEFVAR_LISP ("char-script-table", Vchar_script_table, doc: /* Char table of script symbols. It has one extra slot whose value is a list of script symbols. */); @@ -1116,7 +1093,7 @@ It has one extra slot whose value is a list of script symbols. */); Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1)); Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil); - DEFVAR_LISP ("script-representative-chars", &Vscript_representative_chars, + DEFVAR_LISP ("script-representative-chars", Vscript_representative_chars, doc: /* Alist of scripts vs the representative characters. Each element is a cons (SCRIPT . CHARS). SCRIPT is a symbol representing a script or a subgroup of a script. @@ -1126,7 +1103,7 @@ If it is a vector, one of the characters in the vector is necessary. This variable is used to find a font for a specific script. */); Vscript_representative_chars = Qnil; - DEFVAR_LISP ("unicode-category-table", &Vunicode_category_table, + DEFVAR_LISP ("unicode-category-table", Vunicode_category_table, doc: /* Char table of Unicode's "General Category". All Unicode characters have one of the following values (symbol): Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po, @@ -1137,6 +1114,3 @@ See The Unicode Standard for the meaning of those values. */); } #endif /* emacs */ - -/* arch-tag: b6665960-3c3d-4184-85cd-af4318197999 - (do not change this comment) */