/* Basic character support.
Copyright (C) 1995, 1997, 1998, 2001 Electrotechnical Laboratory, JAPAN.
Licensed to the Free Software Foundation.
- Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008
+ Copyright (C) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009
Free Software Foundation, Inc.
- Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008
+ Copyright (C) 2003, 2004, 2005, 2006, 2007, 2008, 2009
National Institute of Advanced Industrial Science and Technology (AIST)
Registration Number H13PRO009
#ifdef emacs
#include <sys/types.h>
+#include <setjmp.h>
#include "lisp.h"
#include "character.h"
#include "buffer.h"
static Lisp_Object Qchar_script_table;
Lisp_Object Vunicode_category_table;
-
-/* Mapping table from unibyte chars to multibyte chars. */
-int unibyte_to_multibyte_table[256];
-
-/* Nth element is 1 iff unibyte char N can be mapped to a multibyte
- char. */
-char unibyte_has_multibyte_table[256];
-
\f
/* If character code C has modifier masks, reflect them to the
else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
c &= (037 | (~0177 & ~CHAR_CTL));
}
+#if 0 /* This is outside the scope of this function. (bug#4751) */
if (c & CHAR_META)
{
/* Move the meta bit to the right place for a string. */
c = (c & ~CHAR_META) | 0x80;
}
+#endif
return c;
}
return c;
}
-/* Convert the multibyte character C to unibyte 8-bit character based
- on the current value of charset_unibyte. If dimension of
- charset_unibyte is more than one, return (C & 0xFF).
+/* Convert ASCII or 8-bit character C to unibyte. If C is none of
+ them, return (C & 0xFF).
The argument REV_TBL is now ignored. It will be removed in the
future. */
int c;
Lisp_Object rev_tbl;
{
- struct charset *charset;
- unsigned c1;
-
+ if (c < 0x80)
+ return c;
if (CHAR_BYTE8_P (c))
return CHAR_TO_BYTE8 (c);
- charset = CHARSET_FROM_ID (charset_unibyte);
- c1 = ENCODE_CHAR (charset, c);
- return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : c & 0xFF);
+ return (c & 0xFF);
}
/* Like multibyte_char_to_unibyte, but return -1 if C is not supported
multibyte_char_to_unibyte_safe (c)
int c;
{
- struct charset *charset;
- unsigned c1;
-
+ if (c < 0x80)
+ return c;
if (CHAR_BYTE8_P (c))
return CHAR_TO_BYTE8 (c);
- charset = CHARSET_FROM_ID (charset_unibyte);
- c1 = ENCODE_CHAR (charset, c);
- return ((c1 != CHARSET_INVALID_CODE (charset)) ? c1 : -1);
+ return -1;
}
DEFUN ("characterp", Fcharacterp, Scharacterp, 1, 2, 0,
Lisp_Object ch;
{
int c;
- struct charset *charset;
CHECK_CHARACTER (ch);
c = XFASTINT (ch);
- if (c >= 0400)
- error ("Invalid unibyte character: %d", c);
- charset = CHARSET_FROM_ID (charset_unibyte);
- c = DECODE_CHAR (charset, c);
- if (c < 0)
- c = BYTE8_TO_CHAR (XFASTINT (ch));
+ if (c >= 0x100)
+ error ("Not a unibyte character: %d", c);
+ MAKE_CHAR_MULTIBYTE (c);
return make_number (c);
}
respectively. */
int
-c_string_width (str, len, precision, nchars, nbytes)
- const unsigned char *str;
- int precision, *nchars, *nbytes;
+c_string_width (const unsigned char *str, int len, int precision, int *nchars, int *nbytes)
{
int i = 0, i_byte = 0;
int width = 0;
if (NILP (position))
{
p = PT_ADDR;
- }
+ }
else
{
CHECK_NUMBER_COERCE_MARKER (position);
/* Intern this now in case it isn't already done.
Setting this variable twice is harmless.
But don't staticpro it here--that is done in alloc.c. */
- Qchar_table_extra_slots = intern ("char-table-extra-slots");
+ Qchar_table_extra_slots = intern_c_string ("char-table-extra-slots");
DEFSYM (Qchar_script_table, "char-script-table");
Fput (Qchar_script_table, Qchar_table_extra_slots, make_number (1));
Vchar_script_table = Fmake_char_table (Qchar_script_table, Qnil);
DEFVAR_LISP ("script-representative-chars", &Vscript_representative_chars,
doc: /* Alist of scripts vs the representative characters.
-Each element is a cons (SCRIPT . CHARS), where SCRIPT is a script name symbol,
+Each element is a cons (SCRIPT . CHARS).
+SCRIPT is a symbol representing a script or a subgroup of a script.
CHARS is a list or a vector of characters.
If it is a list, all characters in the list are necessary for supporting SCRIPT.
If it is a vector, one of the characters in the vector is necessary.