X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/ecae6af979abcbb5b45c33ee05ceb297678ec9a0..41d579ce4a2a86428f200788df4b15b936aa5076:/src/character.c diff --git a/src/character.c b/src/character.c index 828e220813..397481e5b3 100644 --- a/src/character.c +++ b/src/character.c @@ -1,18 +1,18 @@ /* Basic character support. - Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN. - Licensed to the Free Software Foundation. - Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008 - Free Software Foundation, Inc. - Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008 - National Institute of Advanced Industrial Science and Technology (AIST) - Registration Number H13PRO009 + +Copyright (C) 2001-2011 Free Software Foundation, Inc. +Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN. + Licensed to the Free Software Foundation. +Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011 + National Institute of Advanced Industrial Science and Technology (AIST) + Registration Number H13PRO009 This file is part of GNU Emacs. -GNU Emacs is free software; you can redistribute it and/or modify +GNU Emacs is free software: you can redistribute it and/or modify it under the terms of the GNU General Public License as published by -the Free Software Foundation; either version 3, or (at your option) -any later version. +the Free Software Foundation, either version 3 of the License, or +(at your option) any later version. GNU Emacs is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of @@ -20,9 +20,7 @@ MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License -along with GNU Emacs; see the file COPYING. If not, write to the -Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, -Boston, MA 02110-1301, USA. */ +along with GNU Emacs. If not, see . */ /* At first, see the document in `character.h' to understand the code in this file. */ @@ -36,6 +34,7 @@ Boston, MA 02110-1301, USA. */ #ifdef emacs #include +#include #include "lisp.h" #include "character.h" #include "buffer.h" @@ -51,57 +50,24 @@ Boston, MA 02110-1301, USA. */ Lisp_Object Qcharacterp; -/* Vector of translation table ever defined. - ID of a translation table is used to index this vector. */ -Lisp_Object Vtranslation_table_vector; - -/* A char-table for characters which may invoke auto-filling. */ -Lisp_Object Vauto_fill_chars; - Lisp_Object Qauto_fill_chars; /* Char-table of information about which character to unify to which - Unicode character. */ + Unicode character. Mainly used by the macro MAYBE_UNIFY_CHAR. */ Lisp_Object Vchar_unify_table; -/* A char-table. An element is non-nil iff the corresponding - character has a printable glyph. */ -Lisp_Object Vprintable_chars; - -/* A char-table. An elemnent is a column-width of the corresponding - character. */ -Lisp_Object Vchar_width_table; - -/* A char-table. An element is a symbol indicating the direction - property of corresponding character. */ -Lisp_Object Vchar_direction_table; - /* Variable used locally in the macro FETCH_MULTIBYTE_CHAR. */ unsigned char *_fetch_multibyte_char_p; -/* Char table of scripts. */ -Lisp_Object Vchar_script_table; - -/* Alist of scripts vs representative characters. */ -Lisp_Object Vscript_representative_chars; - static Lisp_Object Qchar_script_table; -/* Mapping table from unibyte chars to multibyte chars. */ -int unibyte_to_multibyte_table[256]; - -/* Nth element is 1 iff unibyte char N can be mapped to a multibyte - char. */ -char unibyte_has_multibyte_table[256]; - /* If character code C has modifier masks, reflect them to the character code if possible. Return the resulting code. */ int -char_resolve_modifier_mask (c) - int c; +char_resolve_modifier_mask (int c) { /* A non-ASCII character can't reflect modifier bits to the code. */ if (! ASCII_CHAR_P ((c & ~CHAR_MODIFIER_MASK))) @@ -134,11 +100,13 @@ char_resolve_modifier_mask (c) else if ((c & 0177) >= 0100 && (c & 0177) <= 0137) c &= (037 | (~0177 & ~CHAR_CTL)); } +#if 0 /* This is outside the scope of this function. (bug#4751) */ if (c & CHAR_META) { /* Move the meta bit to the right place for a string. */ c = (c & ~CHAR_META) | 0x80; } +#endif return c; } @@ -148,9 +116,7 @@ char_resolve_modifier_mask (c) handle them appropriately. */ int -char_string (c, p) - unsigned c; - unsigned char *p; +char_string (unsigned int c, unsigned char *p) { int bytes; @@ -204,10 +170,7 @@ char_string (c, p) character) of the multibyte form. */ int -string_char (p, advanced, len) - const unsigned char *p; - const unsigned char **advanced; - int *len; +string_char (const unsigned char *p, const unsigned char **advanced, int *len) { int c; const unsigned char *saved_p = p; @@ -250,9 +213,7 @@ string_char (p, advanced, len) case, translace C by all tables. */ int -translate_char (table, c) - Lisp_Object table; - int c; +translate_char (Lisp_Object table, int c) { if (CHAR_TABLE_P (table)) { @@ -270,56 +231,45 @@ translate_char (table, c) return c; } -/* Convert the multibyte character C to unibyte 8-bit character based - on the current value of charset_unibyte. If dimension of - charset_unibyte is more than one, return (C & 0xFF). +/* Convert ASCII or 8-bit character C to unibyte. If C is none of + them, return (C & 0xFF). The argument REV_TBL is now ignored. It will be removed in the future. */ int -multibyte_char_to_unibyte (c, rev_tbl) - int c; - Lisp_Object rev_tbl; +multibyte_char_to_unibyte (int c, Lisp_Object rev_tbl) { - struct charset *charset; - unsigned c1; - + if (c < 0x80) + return c; if (CHAR_BYTE8_P (c)) return CHAR_TO_BYTE8 (c); - charset = CHARSET_FROM_ID (charset_unibyte); - c1 = ENCODE_CHAR (charset, c); - return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF); + return (c & 0xFF); } /* Like multibyte_char_to_unibyte, but return -1 if C is not supported by charset_unibyte. */ int -multibyte_char_to_unibyte_safe (c) - int c; +multibyte_char_to_unibyte_safe (int c) { - struct charset *charset; - unsigned c1; - + if (c < 0x80) + return c; if (CHAR_BYTE8_P (c)) return CHAR_TO_BYTE8 (c); - charset = CHARSET_FROM_ID (charset_unibyte); - c1 = ENCODE_CHAR (charset, c); - return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : -1); + return -1; } DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0, doc: /* Return non-nil if OBJECT is a character. */) - (object, ignore) - Lisp_Object object, ignore; + (Lisp_Object object, Lisp_Object ignore) { return (CHARACTERP (object) ? Qt : Qnil); } DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0, doc: /* Return the character of the maximum code. */) - () + (void) { return make_number (MAX_CHAR); } @@ -327,20 +277,15 @@ DEFUN ("max-char", Fmax_char, Smax_char, 0, 0, 0, DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte, Sunibyte_char_to_multibyte, 1, 1, 0, doc: /* Convert the byte CH to multibyte character. */) - (ch) - Lisp_Object ch; + (Lisp_Object ch) { int c; - struct charset *charset; CHECK_CHARACTER (ch); c = XFASTINT (ch); - if (c >= 0400) - error ("Invalid unibyte character: %d", c); - charset = CHARSET_FROM_ID (charset_unibyte); - c = DECODE_CHAR (charset, c); - if (c < 0) - c = BYTE8_TO_CHAR (XFASTINT (ch)); + if (c >= 0x100) + error ("Not a unibyte character: %d", c); + MAKE_CHAR_MULTIBYTE (c); return make_number (c); } @@ -348,8 +293,7 @@ DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte, Smultibyte_char_to_unibyte, 1, 1, 0, doc: /* Convert the multibyte character CH to a byte. If the multibyte character does not represent a byte, return -1. */) - (ch) - Lisp_Object ch; + (Lisp_Object ch) { int cm; @@ -361,29 +305,17 @@ If the multibyte character does not represent a byte, return -1. */) return ch; else { - int cu = CHAR_TO_BYTE8 (cm); + int cu = CHAR_TO_BYTE_SAFE (cm); return make_number (cu); } } -DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0, - doc: /* Return 1 regardless of the argument CHAR. -This is now an obsolete function. We keep it just for backward compatibility. -usage: (char-bytes CHAR) */) - (ch) - Lisp_Object ch; -{ - CHECK_CHARACTER (ch); - return make_number (1); -} - DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0, doc: /* Return width of CHAR when displayed in the current buffer. The width is measured by how many columns it occupies on the screen. Tab is taken to occupy `tab-width' columns. usage: (char-width CHAR) */) - (ch) - Lisp_Object ch; + (Lisp_Object ch) { Lisp_Object disp; int c, width; @@ -410,20 +342,19 @@ usage: (char-width CHAR) */) characters and bytes of the substring in *NCHARS and *NBYTES respectively. */ -int -c_string_width (str, len, precision, nchars, nbytes) - const unsigned char *str; - int precision, *nchars, *nbytes; +EMACS_INT +c_string_width (const unsigned char *str, EMACS_INT len, int precision, + EMACS_INT *nchars, EMACS_INT *nbytes) { - int i = 0, i_byte = 0; - int width = 0; + EMACS_INT i = 0, i_byte = 0; + EMACS_INT width = 0; struct Lisp_Char_Table *dp = buffer_display_table (); while (i_byte < len) { int bytes, thiswidth; Lisp_Object val; - int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes); + int c = STRING_CHAR_AND_LENGTH (str + i_byte, bytes); if (dp) { @@ -463,12 +394,10 @@ c_string_width (str, len, precision, nchars, nbytes) current buffer. The width is measured by how many columns it occupies on the screen. */ -int -strwidth (str, len) - unsigned char *str; - int len; +EMACS_INT +strwidth (const char *str, EMACS_INT len) { - return c_string_width (str, len, -1, NULL, NULL); + return c_string_width ((const unsigned char *) str, len, -1, NULL, NULL); } /* Return width of Lisp string STRING when displayed in the current @@ -478,19 +407,18 @@ strwidth (str, len) PRECISION, and set number of characters and bytes of the substring in *NCHARS and *NBYTES respectively. */ -int -lisp_string_width (string, precision, nchars, nbytes) - Lisp_Object string; - int precision, *nchars, *nbytes; +EMACS_INT +lisp_string_width (Lisp_Object string, int precision, + EMACS_INT *nchars, EMACS_INT *nbytes) { - int len = SCHARS (string); + EMACS_INT len = SCHARS (string); /* This set multibyte to 0 even if STRING is multibyte when it contains only ascii and eight-bit-graphic, but that's intentional. */ int multibyte = len < SBYTES (string); unsigned char *str = SDATA (string); - int i = 0, i_byte = 0; - int width = 0; + EMACS_INT i = 0, i_byte = 0; + EMACS_INT width = 0; struct Lisp_Char_Table *dp = buffer_display_table (); while (i < len) @@ -513,7 +441,7 @@ lisp_string_width (string, precision, nchars, nbytes) int c; if (multibyte) - c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes); + c = STRING_CHAR_AND_LENGTH (str + i_byte, bytes); else c = str[i_byte], bytes = 1; chars = 1; @@ -560,8 +488,7 @@ only the base leading-code is considered; the validity of the following bytes is not checked. Tabs in STRING are always taken to occupy `tab-width' columns. usage: (string-width STRING) */) - (str) - Lisp_Object str; + (Lisp_Object str) { Lisp_Object val; @@ -574,8 +501,7 @@ DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0, doc: /* Return the direction of CHAR. The returned value is 0 for left-to-right and 1 for right-to-left. usage: (char-direction CHAR) */) - (ch) - Lisp_Object ch; + (Lisp_Object ch) { int c; @@ -591,9 +517,7 @@ usage: (char-direction CHAR) */) nil, we treat each byte as a character. */ EMACS_INT -chars_in_text (ptr, nbytes) - const unsigned char *ptr; - EMACS_INT nbytes; +chars_in_text (const unsigned char *ptr, EMACS_INT nbytes) { /* current_buffer is null at early stages of Emacs initialization. */ if (current_buffer == 0 @@ -609,16 +533,14 @@ chars_in_text (ptr, nbytes) ignores enable-multibyte-characters. */ EMACS_INT -multibyte_chars_in_text (ptr, nbytes) - const unsigned char *ptr; - EMACS_INT nbytes; +multibyte_chars_in_text (const unsigned char *ptr, EMACS_INT nbytes) { const unsigned char *endp = ptr + nbytes; - int chars = 0; + EMACS_INT chars = 0; while (ptr < endp) { - int len = MULTIBYTE_LENGTH (ptr, endp); + EMACS_INT len = MULTIBYTE_LENGTH (ptr, endp); if (len == 0) abort (); @@ -636,19 +558,19 @@ multibyte_chars_in_text (ptr, nbytes) represented by 2-byte in a multibyte text. */ void -parse_str_as_multibyte (str, len, nchars, nbytes) - const unsigned char *str; - int len, *nchars, *nbytes; +parse_str_as_multibyte (const unsigned char *str, EMACS_INT len, + EMACS_INT *nchars, EMACS_INT *nbytes) { const unsigned char *endp = str + len; - int n, chars = 0, bytes = 0; + EMACS_INT n, chars = 0, bytes = 0; if (len >= MAX_MULTIBYTE_LENGTH) { const unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH; while (str < adjusted_endp) { - if ((n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0) + if (! CHAR_BYTE8_HEAD_P (*str) + && (n = MULTIBYTE_LENGTH_NO_CHECK (str)) > 0) str += n, bytes += n; else str++, bytes += 2; @@ -657,7 +579,8 @@ parse_str_as_multibyte (str, len, nchars, nbytes) } while (str < endp) { - if ((n = MULTIBYTE_LENGTH (str, endp)) > 0) + if (! CHAR_BYTE8_HEAD_P (*str) + && (n = MULTIBYTE_LENGTH (str, endp)) > 0) str += n, bytes += n; else str++, bytes += 2; @@ -677,24 +600,26 @@ parse_str_as_multibyte (str, len, nchars, nbytes) area and that is enough. Return the number of bytes of the resulting text. */ -int -str_as_multibyte (str, len, nbytes, nchars) - unsigned char *str; - int len, nbytes, *nchars; +EMACS_INT +str_as_multibyte (unsigned char *str, EMACS_INT len, EMACS_INT nbytes, + EMACS_INT *nchars) { unsigned char *p = str, *endp = str + nbytes; unsigned char *to; - int chars = 0; + EMACS_INT chars = 0; int n; if (nbytes >= MAX_MULTIBYTE_LENGTH) { unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH; while (p < adjusted_endp + && ! CHAR_BYTE8_HEAD_P (*p) && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0) p += n, chars++; } - while ((n = MULTIBYTE_LENGTH (p, endp)) > 0) + while (p < endp + && ! CHAR_BYTE8_HEAD_P (*p) + && (n = MULTIBYTE_LENGTH (p, endp)) > 0) p += n, chars++; if (nchars) *nchars = chars; @@ -704,7 +629,7 @@ str_as_multibyte (str, len, nbytes, nchars) to = p; nbytes = endp - p; endp = str + len; - safe_bcopy ((char *) p, (char *) (endp - nbytes), nbytes); + memmove (endp - nbytes, p, nbytes); p = endp - nbytes; if (nbytes >= MAX_MULTIBYTE_LENGTH) @@ -712,7 +637,8 @@ str_as_multibyte (str, len, nbytes, nchars) unsigned char *adjusted_endp = endp - MAX_MULTIBYTE_LENGTH; while (p < adjusted_endp) { - if ((n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0) + if (! CHAR_BYTE8_HEAD_P (*p) + && (n = MULTIBYTE_LENGTH_NO_CHECK (p)) > 0) { while (n--) *to++ = *p++; @@ -728,7 +654,8 @@ str_as_multibyte (str, len, nbytes, nchars) } while (p < endp) { - if ((n = MULTIBYTE_LENGTH (p, endp)) > 0) + if (! CHAR_BYTE8_HEAD_P (*p) + && (n = MULTIBYTE_LENGTH (p, endp)) > 0) { while (n--) *to++ = *p++; @@ -750,13 +677,11 @@ str_as_multibyte (str, len, nbytes, nchars) bytes it may ocupy when converted to multibyte string by `str_to_multibyte'. */ -int -parse_str_to_multibyte (str, len) - unsigned char *str; - int len; +EMACS_INT +parse_str_to_multibyte (const unsigned char *str, EMACS_INT len) { - unsigned char *endp = str + len; - int bytes; + const unsigned char *endp = str + len; + EMACS_INT bytes; for (bytes = 0; str < endp; str++) bytes += (*str < 0x80) ? 1 : 2; @@ -770,10 +695,8 @@ parse_str_to_multibyte (str, len) that we can use LEN bytes at STR as a work area and that is enough. */ -int -str_to_multibyte (str, len, bytes) - unsigned char *str; - int len, bytes; +EMACS_INT +str_to_multibyte (unsigned char *str, EMACS_INT len, EMACS_INT bytes) { unsigned char *p = str, *endp = str + bytes; unsigned char *to; @@ -784,7 +707,7 @@ str_to_multibyte (str, len, bytes) to = p; bytes = endp - p; endp = str + len; - safe_bcopy ((char *) p, (char *) (endp - bytes), bytes); + memmove (endp - bytes, p, bytes); p = endp - bytes; while (p < endp) { @@ -801,10 +724,8 @@ str_to_multibyte (str, len, bytes) actually converts characters in the range 0x80..0xFF to unibyte. */ -int -str_as_unibyte (str, bytes) - unsigned char *str; - int bytes; +EMACS_INT +str_as_unibyte (unsigned char *str, EMACS_INT bytes) { const unsigned char *p = str, *endp = str + bytes; unsigned char *to; @@ -836,15 +757,43 @@ str_as_unibyte (str, bytes) return (to - str); } -int -string_count_byte8 (string) - Lisp_Object string; +/* Convert eight-bit chars in SRC (in multibyte form) to the + corresponding byte and store in DST. CHARS is the number of + characters in SRC. The value is the number of bytes stored in DST. + Usually, the value is the same as CHARS, but is less than it if SRC + contains a non-ASCII, non-eight-bit characater. If ACCEPT_LATIN_1 + is nonzero, a Latin-1 character is accepted and converted to a byte + of that character code. + Note: Currently the arg ACCEPT_LATIN_1 is not used. */ + +EMACS_INT +str_to_unibyte (const unsigned char *src, unsigned char *dst, EMACS_INT chars, int accept_latin_1) +{ + EMACS_INT i; + + for (i = 0; i < chars; i++) + { + int c = STRING_CHAR_ADVANCE (src); + + if (CHAR_BYTE8_P (c)) + c = CHAR_TO_BYTE8 (c); + else if (! ASCII_CHAR_P (c) + && (! accept_latin_1 || c >= 0x100)) + return i; + *dst++ = c; + } + return i; +} + + +EMACS_INT +string_count_byte8 (Lisp_Object string) { int multibyte = STRING_MULTIBYTE (string); - int nbytes = SBYTES (string); + EMACS_INT nbytes = SBYTES (string); unsigned char *p = SDATA (string); unsigned char *pend = p + nbytes; - int count = 0; + EMACS_INT count = 0; int c, len; if (multibyte) @@ -868,13 +817,12 @@ string_count_byte8 (string) Lisp_Object -string_escape_byte8 (string) - Lisp_Object string; +string_escape_byte8 (Lisp_Object string) { - int nchars = SCHARS (string); - int nbytes = SBYTES (string); + EMACS_INT nchars = SCHARS (string); + EMACS_INT nbytes = SBYTES (string); int multibyte = STRING_MULTIBYTE (string); - int byte8_count; + EMACS_INT byte8_count; const unsigned char *src, *src_end; unsigned char *dst; Lisp_Object val; @@ -889,12 +837,22 @@ string_escape_byte8 (string) return string; if (multibyte) - /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */ - val = make_uninit_multibyte_string (nchars + byte8_count * 3, - nbytes + byte8_count * 2); + { + if ((MOST_POSITIVE_FIXNUM - nchars) / 3 < byte8_count + || (MOST_POSITIVE_FIXNUM - nbytes) / 2 < byte8_count) + error ("Maximum string size exceeded"); + + /* Convert 2-byte sequence of byte8 chars to 4-byte octal. */ + val = make_uninit_multibyte_string (nchars + byte8_count * 3, + nbytes + byte8_count * 2); + } else - /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */ - val = make_uninit_string (nbytes + byte8_count * 3); + { + if ((MOST_POSITIVE_FIXNUM - nchars) / 3 < byte8_count) + error ("Maximum string size exceeded"); + /* Convert 1-byte sequence of byte8 chars to 4-byte octal. */ + val = make_uninit_string (nbytes + byte8_count * 3); + } src = SDATA (string); src_end = src + nbytes; @@ -935,14 +893,15 @@ DEFUN ("string", Fstring, Sstring, 0, MANY, 0, doc: /* Concatenate all the argument characters and make the result a string. usage: (string &rest CHARACTERS) */) - (n, args) - int n; - Lisp_Object *args; + (int n, Lisp_Object *args) { - int i; - unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n); - unsigned char *p = buf; - int c; + int i, c; + unsigned char *buf, *p; + Lisp_Object str; + USE_SAFE_ALLOCA; + + SAFE_ALLOCA (buf, unsigned char *, MAX_MULTIBYTE_LENGTH * n); + p = buf; for (i = 0; i < n; i++) { @@ -951,20 +910,23 @@ usage: (string &rest CHARACTERS) */) p += CHAR_STRING (c, p); } - return make_string_from_bytes ((char *) buf, n, p - buf); + str = make_string_from_bytes ((char *) buf, n, p - buf); + SAFE_FREE (); + return str; } DEFUN ("unibyte-string", Funibyte_string, Sunibyte_string, 0, MANY, 0, doc: /* Concatenate all the argument bytes and make the result a unibyte string. usage: (unibyte-string &rest BYTES) */) - (n, args) - int n; - Lisp_Object *args; + (int n, Lisp_Object *args) { - int i; - unsigned char *buf = (unsigned char *) alloca (n); - unsigned char *p = buf; - unsigned c; + int i, c; + unsigned char *buf, *p; + Lisp_Object str; + USE_SAFE_ALLOCA; + + SAFE_ALLOCA (buf, unsigned char *, n); + p = buf; for (i = 0; i < n; i++) { @@ -975,17 +937,18 @@ usage: (unibyte-string &rest BYTES) */) *p++ = c; } - return make_string_from_bytes ((char *) buf, n, p - buf); + str = make_string_from_bytes ((char *) buf, n, p - buf); + SAFE_FREE (); + return str; } -DEFUN ("char-resolve-modifers", Fchar_resolve_modifiers, +DEFUN ("char-resolve-modifiers", Fchar_resolve_modifiers, Schar_resolve_modifiers, 1, 1, 0, doc: /* Resolve modifiers in the character CHAR. The value is a character with modifiers resolved into the character code. Unresolved modifiers are kept in the value. -usage: (char-resolve-modifers CHAR) */) - (character) - Lisp_Object character; +usage: (char-resolve-modifiers CHAR) */) + (Lisp_Object character) { int c; @@ -994,15 +957,75 @@ usage: (char-resolve-modifers CHAR) */) return make_number (char_resolve_modifier_mask (c)); } +DEFUN ("get-byte", Fget_byte, Sget_byte, 0, 2, 0, + doc: /* Return a byte value of a character at point. +Optional 1st arg POSITION, if non-nil, is a position of a character to get +a byte value. +Optional 2nd arg STRING, if non-nil, is a string of which first +character is a target to get a byte value. In this case, POSITION, if +non-nil, is an index of a target character in the string. + +If the current buffer (or STRING) is multibyte, and the target +character is not ASCII nor 8-bit character, an error is signalled. */) + (Lisp_Object position, Lisp_Object string) +{ + int c; + EMACS_INT pos; + unsigned char *p; + + if (NILP (string)) + { + if (NILP (position)) + { + p = PT_ADDR; + } + else + { + CHECK_NUMBER_COERCE_MARKER (position); + if (XINT (position) < BEGV || XINT (position) >= ZV) + args_out_of_range_3 (position, make_number (BEGV), make_number (ZV)); + pos = XFASTINT (position); + p = CHAR_POS_ADDR (pos); + } + if (NILP (current_buffer->enable_multibyte_characters)) + return make_number (*p); + } + else + { + CHECK_STRING (string); + if (NILP (position)) + { + p = SDATA (string); + } + else + { + CHECK_NATNUM (position); + if (XINT (position) >= SCHARS (string)) + args_out_of_range (string, position); + pos = XFASTINT (position); + p = SDATA (string) + string_char_to_byte (string, pos); + } + if (! STRING_MULTIBYTE (string)) + return make_number (*p); + } + c = STRING_CHAR (p); + if (CHAR_BYTE8_P (c)) + c = CHAR_TO_BYTE8 (c); + else if (! ASCII_CHAR_P (c)) + error ("Not an ASCII nor an 8-bit character: %d", c); + return make_number (c); +} + + void -init_character_once () +init_character_once (void) { } #ifdef emacs void -syms_of_character () +syms_of_character (void) { DEFSYM (Qcharacterp, "characterp"); DEFSYM (Qauto_fill_chars, "auto-fill-chars"); @@ -1014,22 +1037,22 @@ syms_of_character () defsubr (&Scharacterp); defsubr (&Sunibyte_char_to_multibyte); defsubr (&Smultibyte_char_to_unibyte); - defsubr (&Schar_bytes); defsubr (&Schar_width); defsubr (&Sstring_width); defsubr (&Schar_direction); defsubr (&Sstring); defsubr (&Sunibyte_string); defsubr (&Schar_resolve_modifiers); + defsubr (&Sget_byte); - DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector, + DEFVAR_LISP ("translation-table-vector", Vtranslation_table_vector, doc: /* Vector recording all translation tables ever defined. Each element is a pair (SYMBOL . TABLE) relating the table to the symbol naming it. The ID of a translation table is an index into this vector. */); Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil); - DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars, + DEFVAR_LISP ("auto-fill-chars", Vauto_fill_chars, doc: /* A char-table for characters which invoke auto-filling. Such characters have value t in this table. */); @@ -1037,7 +1060,7 @@ Such characters have value t in this table. */); CHAR_TABLE_SET (Vauto_fill_chars, ' ', Qt); CHAR_TABLE_SET (Vauto_fill_chars, '\n', Qt); - DEFVAR_LISP ("char-width-table", &Vchar_width_table, + DEFVAR_LISP ("char-width-table", Vchar_width_table, doc: /* A char-table for width (columns) of each character. */); Vchar_width_table = Fmake_char_table (Qnil, make_number (1)); @@ -1045,11 +1068,11 @@ A char-table for width (columns) of each character. */); char_table_set_range (Vchar_width_table, MAX_5_BYTE_CHAR + 1, MAX_CHAR, make_number (4)); - DEFVAR_LISP ("char-direction-table", &Vchar_direction_table, + DEFVAR_LISP ("char-direction-table", Vchar_direction_table, doc: /* A char-table for direction of each character. */); Vchar_direction_table = Fmake_char_table (Qnil, make_number (1)); - DEFVAR_LISP ("printable-chars", &Vprintable_chars, + DEFVAR_LISP ("printable-chars", Vprintable_chars, doc: /* A char-table for each printable character. */); Vprintable_chars = Fmake_char_table (Qnil, Qnil); Fset_char_table_range (Vprintable_chars, @@ -1058,24 +1081,36 @@ A char-table for width (columns) of each character. */); Fcons (make_number (160), make_number (MAX_5_BYTE_CHAR)), Qt); - DEFVAR_LISP ("char-script-table", &Vchar_script_table, + DEFVAR_LISP ("char-script-table", Vchar_script_table, doc: /* Char table of script symbols. It has one extra slot whose value is a list of script symbols. */); /* Intern this now in case it isn't already done. Setting this variable twice is harmless. But don't staticpro it here--that is done in alloc.c. */ - Qchar_table_extra_slots = intern ("char-table-extra-slots"); + Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots"); DEFSYM (Qchar_script_table, "char-script-table"); Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1)); Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil); - DEFVAR_LISP ("script-representative-chars", &Vscript_representative_chars, - doc: /* Alist of scripts vs the representative characters. */); + DEFVAR_LISP ("script-representative-chars", Vscript_representative_chars, + doc: /* Alist of scripts vs the representative characters. +Each element is a cons (SCRIPT . CHARS). +SCRIPT is a symbol representing a script or a subgroup of a script. +CHARS is a list or a vector of characters. +If it is a list, all characters in the list are necessary for supporting SCRIPT. +If it is a vector, one of the characters in the vector is necessary. +This variable is used to find a font for a specific script. */); Vscript_representative_chars = Qnil; + + DEFVAR_LISP ("unicode-category-table", Vunicode_category_table, + doc: /* Char table of Unicode's "General Category". +All Unicode characters have one of the following values (symbol): + Lu, Ll, Lt, Lm, Lo, Mn, Mc, Me, Nd, Nl, No, Pc, Pd, Ps, Pe, Pi, Pf, Po, + Sm, Sc, Sk, So, Zs, Zl, Zp, Cc, Cf, Cs, Co, Cn +See The Unicode Standard for the meaning of those values. */); + /* The correct char-table is setup in characters.el. */ + Vunicode_category_table = Qnil; } #endif /* emacs */ - -/* arch-tag: b6665960-3c3d-4184-85cd-af4318197999 - (do not change this comment) */