/* Basic character support.
Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
Licensed to the Free Software Foundation.
- Copyright (C) 2001, 2005 Free Software Foundation, Inc.
- Copyright (C) 2003
+ Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ Free Software Foundation, Inc.
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H13PRO009
GNU Emacs is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
-the Free Software Foundation; either version 2, or (at your option)
+the Free Software Foundation; either version 3, or (at your option)
any later version.
GNU Emacs is distributed in the hope that it will be useful,
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
-along with GNU Emacs; see the file COPYING. If not, write to
-the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
-Boston, MA 02111-1307, USA. */
+along with GNU Emacs; see the file COPYING. If not, write to the
+Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+Boston, MA 02110-1301, USA. */
/* At first, see the document in `character.h' to understand the code
in this file. */
/* Char table of scripts. */
Lisp_Object Vchar_script_table;
+/* Alist of scripts vs representative characters. */
+Lisp_Object Vscript_representative_chars;
+
static Lisp_Object Qchar_script_table;
/* Mapping table from unibyte chars to multibyte chars. */
\f
+/* If character code C has modifier masks, reflect them to the
+ character code if possible. Return the resulting code. */
+
+int
+char_resolve_modifier_mask (c)
+ int c;
+{
+ /* A non-ASCII character can't reflect modifier bits to the code. */
+ if (! ASCII_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
+ return c;
+
+ /* For Meta, Shift, and Control modifiers, we need special care. */
+ if (c & CHAR_SHIFT)
+ {
+ /* Shift modifier is valid only with [A-Za-z]. */
+ if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
+ c &= ~CHAR_SHIFT;
+ else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
+ c = (c & ~CHAR_SHIFT) - ('a' - 'A');
+ /* Shift modifier for control characters and SPC is ignored. */
+ else if ((c & ~CHAR_MODIFIER_MASK) <= 0x20)
+ c &= ~CHAR_SHIFT;
+ }
+ if (c & CHAR_CTL)
+ {
+ /* Simulate the code in lread.c. */
+ /* Allow `\C- ' and `\C-?'. */
+ if ((c & 0377) == ' ')
+ c &= ~0177 & ~ CHAR_CTL;
+ else if ((c & 0377) == '?')
+ c = 0177 | (c & ~0177 & ~CHAR_CTL);
+ /* ASCII control chars are made from letters (both cases),
+ as well as the non-letters within 0100...0137. */
+ else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
+ c &= (037 | (~0177 & ~CHAR_CTL));
+ else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
+ c &= (037 | (~0177 & ~CHAR_CTL));
+ }
+ if (c & CHAR_META)
+ {
+ /* Move the meta bit to the right place for a string. */
+ c = (c & ~CHAR_META) | 0x80;
+ }
+
+ return c;
+}
+
+
/* Store multibyte form of character C at P. If C has modifier bits,
handle them appropriately. */
int
char_string (c, p)
- int c;
+ unsigned c;
unsigned char *p;
{
int bytes;
if (c & CHAR_MODIFIER_MASK)
{
- /* As an non-ASCII character can't have modifier bits, we just
- ignore the bits. */
- if (ASCII_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
- {
- /* For Meta, Shift, and Control modifiers, we need special care. */
- if (c & CHAR_META)
- {
- /* Move the meta bit to the right place for a string. */
- c = (c & ~CHAR_META) | 0x80;
- }
- if (c & CHAR_SHIFT)
- {
- /* Shift modifier is valid only with [A-Za-z]. */
- if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
- c &= ~CHAR_SHIFT;
- else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
- c = (c & ~CHAR_SHIFT) - ('a' - 'A');
- }
- if (c & CHAR_CTL)
- {
- /* Simulate the code in lread.c. */
- /* Allow `\C- ' and `\C-?'. */
- if (c == (CHAR_CTL | ' '))
- c = 0;
- else if (c == (CHAR_CTL | '?'))
- c = 127;
- /* ASCII control chars are made from letters (both cases),
- as well as the non-letters within 0100...0137. */
- else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
- c &= (037 | (~0177 & ~CHAR_CTL));
- else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
- c &= (037 | (~0177 & ~CHAR_CTL));
- }
- }
-
+ c = (unsigned) char_resolve_modifier_mask ((int) c);
/* If C still has any modifier bits, just ignore it. */
c &= ~CHAR_MODIFIER_MASK;
}
p[4] = (0x80 | (c & 0x3F));
bytes = 5;
}
- else
+ else if (c <= MAX_CHAR)
{
c = CHAR_TO_BYTE8 (c);
bytes = BYTE8_STRING (c, p);
}
+ else
+ error ("Invalid character: %d", c);
return bytes;
}
return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
}
+/* Like multibyte_char_to_unibyte, but return -1 if C is not supported
+ by charset_unibyte. */
+
+int
+multibyte_char_to_unibyte_safe (c)
+ int c;
+{
+ struct charset *charset;
+ unsigned c1;
+
+ if (CHAR_BYTE8_P (c))
+ return CHAR_TO_BYTE8 (c);
+ charset = CHARSET_FROM_ID (charset_unibyte);
+ c1 = ENCODE_CHAR (charset, c);
+ return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : -1);
+}
DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
doc: /* Return non-nil if OBJECT is a character. */)
However, if the current buffer has enable-multibyte-characters =
nil, we treat each byte as a character. */
-int
+EMACS_INT
chars_in_text (ptr, nbytes)
const unsigned char *ptr;
- int nbytes;
+ EMACS_INT nbytes;
{
/* current_buffer is null at early stages of Emacs initialization. */
if (current_buffer == 0
sequences while assuming that there's no invalid sequence. It
ignores enable-multibyte-characters. */
-int
+EMACS_INT
multibyte_chars_in_text (ptr, nbytes)
const unsigned char *ptr;
- int nbytes;
+ EMACS_INT nbytes;
{
const unsigned char *endp = ptr + nbytes;
int chars = 0;
return make_string_from_bytes ((char *) buf, n, p - buf);
}
+DEFUN ("unibyte-string", Funibyte_string, Sunibyte_string, 0, MANY, 0,
+ doc: /* Concatenate all the argument bytes and make the result a unibyte string.
+usage: (unibyte-string &rest BYTES) */)
+ (n, args)
+ int n;
+ Lisp_Object *args;
+{
+ int i;
+ unsigned char *buf = (unsigned char *) alloca (n);
+ unsigned char *p = buf;
+ unsigned c;
+
+ for (i = 0; i < n; i++)
+ {
+ CHECK_NATNUM (args[i]);
+ c = XFASTINT (args[i]);
+ if (c >= 256)
+ args_out_of_range_3 (args[i], make_number (0), make_number (255));
+ *p++ = c;
+ }
+
+ return make_string_from_bytes ((char *) buf, n, p - buf);
+}
+
+DEFUN ("char-resolve-modifers", Fchar_resolve_modifiers,
+ Schar_resolve_modifiers, 1, 1, 0,
+ doc: /* Resolve modifiers in the character CHAR.
+The value is a character with modifiers resolved into the character
+code. Unresolved modifiers are kept in the value.
+usage: (char-resolve-modifers CHAR) */)
+ (character)
+ Lisp_Object character;
+{
+ int c;
+
+ CHECK_NUMBER (character);
+ c = XINT (character);
+ return make_number (char_resolve_modifier_mask (c));
+}
+
void
init_character_once ()
{
defsubr (&Sstring_width);
defsubr (&Schar_direction);
defsubr (&Sstring);
+ defsubr (&Sunibyte_string);
+ defsubr (&Schar_resolve_modifiers);
DEFVAR_LISP ("translation-table-vector", &Vtranslation_table_vector,
doc: /*
DEFSYM (Qchar_script_table, "char-script-table");
Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1));
Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
+
+ DEFVAR_LISP ("script-representative-chars", &Vscript_representative_chars,
+ doc: /* Alist of scripts vs the representative characters. */);
+ Vscript_representative_chars = Qnil;
}
#endif /* emacs */