/* Basic character set support.
- Copyright (C) 2001-2011 Free Software Foundation, Inc.
+ Copyright (C) 2001-2012 Free Software Foundation, Inc.
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011
National Institute of Advanced Industrial Science and Technology (AIST)
#include <config.h>
+#define CHARSET_INLINE EXTERN_INLINE
+
#include <stdio.h>
#include <unistd.h>
-#include <ctype.h>
#include <limits.h>
#include <sys/types.h>
-#include <setjmp.h>
+#include <c-ctype.h>
#include "lisp.h"
#include "character.h"
#include "charset.h"
#define CODE_POINT_TO_INDEX(charset, code) \
((charset)->code_linear_p \
- ? (code) - (charset)->min_code \
+ ? (int) ((code) - (charset)->min_code) \
: (((charset)->code_space_mask[(code) >> 24] & 0x8) \
&& ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
&& ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
&& ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
- ? (((((code) >> 24) - (charset)->code_space[12]) \
- * (charset)->code_space[11]) \
- + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
- * (charset)->code_space[7]) \
- + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
- * (charset)->code_space[3]) \
- + (((code) & 0xFF) - (charset)->code_space[0]) \
- - ((charset)->char_index_offset)) \
+ ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
+ * (charset)->code_space[11]) \
+ + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
+ * (charset)->code_space[7]) \
+ + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
+ * (charset)->code_space[3]) \
+ + (((code) & 0xFF) - (charset)->code_space[0]) \
+ - ((charset)->char_index_offset)) \
: -1)
-/* Convert the character index IDX to code-point CODE for CHARSET.
- It is assumed that IDX is in a valid range. */
+/* Return the code-point for the character index IDX in CHARSET.
+ IDX should be an unsigned int variable in a valid range (which is
+ always in nonnegative int range too). IDX contains garbage afterwards. */
#define INDEX_TO_CODE_POINT(charset, idx) \
((charset)->code_linear_p \
/* 1 iff the following table is used for encoder. */
short for_encoder;
- /* When the following table is used for encoding, mininum and
- maxinum character of the current charset. */
+ /* When the following table is used for encoding, minimum and
+ maximum character of the current charset. */
int min_char, max_char;
- /* A Unicode character correspoinding to the code indice 0 (i.e. the
+ /* A Unicode character corresponding to the code index 0 (i.e. the
minimum code-point) of the current charset, or -1 if the code
- indice 0 is not a Unicode character. This is checked when
+ index 0 is not a Unicode character. This is checked when
table.encoder[CHAR] is zero. */
int zero_index_char;
/* Set to 1 to warn that a charset map is loaded and thus a buffer
text and a string data may be relocated. */
-int charset_map_loaded;
+bool charset_map_loaded;
struct charset_map_entries
{
{
Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
unsigned max_code = CHARSET_MAX_CODE (charset);
- int ascii_compatible_p = charset->ascii_compatible_p;
+ bool ascii_compatible_p = charset->ascii_compatible_p;
int min_char, max_char, nonascii_min_char;
int i;
unsigned char *fast_map = charset->fast_map;
{
int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
- vec = CHARSET_DECODER (charset)
- = Fmake_vector (make_number (n), make_number (-1));
+ vec = Fmake_vector (make_number (n), make_number (-1));
+ set_charset_attr (charset, charset_decoder, vec);
}
else
{
else
{
table = Fmake_char_table (Qnil, Qnil);
- if (charset->method == CHARSET_METHOD_MAP)
- CHARSET_ENCODER (charset) = table;
- else
- CHARSET_DEUNIFIER (charset) = table;
+ set_charset_attr (charset,
+ (charset->method == CHARSET_METHOD_MAP
+ ? charset_encoder : charset_deunifier),
+ table);
}
}
else
{
if (! temp_charset_work)
- temp_charset_work = xmalloc (sizeof (*temp_charset_work));
+ temp_charset_work = xmalloc (sizeof *temp_charset_work);
if (control_flag == 1)
{
memset (temp_charset_work->table.decoder, -1,
&& CHARSET_COMPACT_CODES_P (charset))
for (; from_index < lim_index; from_index++, from_c++)
{
- unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
+ unsigned code = from_index;
+ code = INDEX_TO_CODE_POINT (charset, code);
if (NILP (CHAR_TABLE_REF (table, from_c)))
CHAR_TABLE_SET (table, from_c, make_number (code));
paying attention to comment character '#'. */
static inline unsigned
-read_hex (FILE *fp, int *eof, int *overflow)
+read_hex (FILE *fp, bool *eof, bool *overflow)
{
int c;
unsigned n;
return 0;
}
n = 0;
- while (isxdigit (c = getc (fp)))
+ while (c_isxdigit (c = getc (fp)))
{
if (UINT_MAX >> 4 < n)
*overflow = 1;
FILE *fp;
Lisp_Object suffixes;
struct charset_map_entries *head, *entries;
- int n_entries, count;
+ int n_entries;
+ ptrdiff_t count;
USE_SAFE_ALLOCA;
suffixes = Fcons (build_string (".map"),
/* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
large (larger than MAX_ALLOCA). */
- SAFE_ALLOCA (head, struct charset_map_entries *,
- sizeof (struct charset_map_entries));
+ head = SAFE_ALLOCA (sizeof *head);
entries = head;
memset (entries, 0, sizeof (struct charset_map_entries));
{
unsigned from, to, c;
int idx;
- int eof = 0, overflow = 0;
+ bool eof = 0, overflow = 0;
from = read_hex (fp, &eof, &overflow);
if (eof)
if (n_entries > 0 && (n_entries % 0x10000) == 0)
{
- SAFE_ALLOCA (entries->next, struct charset_map_entries *,
- sizeof (struct charset_map_entries));
+ entries->next = SAFE_ALLOCA (sizeof *entries->next);
entries = entries->next;
memset (entries, 0, sizeof (struct charset_map_entries));
+ n_entries = 0;
}
- idx = n_entries % 0x10000;
+ idx = n_entries;
entries->entry[idx].from = from;
entries->entry[idx].to = to;
entries->entry[idx].c = c;
/* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
large (larger than MAX_ALLOCA). */
- SAFE_ALLOCA (head, struct charset_map_entries *,
- sizeof (struct charset_map_entries));
+ head = SAFE_ALLOCA (sizeof *head);
entries = head;
memset (entries, 0, sizeof (struct charset_map_entries));
{
Lisp_Object val, val2;
unsigned from, to;
- int c;
+ EMACS_INT c;
int idx;
val = AREF (vec, i);
{
val2 = XCDR (val);
val = XCAR (val);
- CHECK_NATNUM (val);
- CHECK_NATNUM (val2);
from = XFASTINT (val);
to = XFASTINT (val2);
}
else
- {
- CHECK_NATNUM (val);
- from = to = XFASTINT (val);
- }
+ from = to = XFASTINT (val);
val = AREF (vec, i + 1);
CHECK_NATNUM (val);
c = XFASTINT (val);
if (n_entries > 0 && (n_entries % 0x10000) == 0)
{
- SAFE_ALLOCA (entries->next, struct charset_map_entries *,
- sizeof (struct charset_map_entries));
+ entries->next = SAFE_ALLOCA (sizeof *entries->next);
entries = entries->next;
memset (entries, 0, sizeof (struct charset_map_entries));
}
else
{
if (! CHARSET_UNIFIED_P (charset))
- abort ();
+ emacs_abort ();
map = CHARSET_UNIFY_MAP (charset);
}
if (STRINGP (map))
Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
{
Lisp_Object range;
- int partial;
-
- partial = (from > CHARSET_MIN_CODE (charset)
- || to < CHARSET_MAX_CODE (charset));
+ bool partial = (from > CHARSET_MIN_CODE (charset)
+ || to < CHARSET_MAX_CODE (charset));
if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
{
from = CHARSET_MIN_CODE (cs);
else
{
- CHECK_NATNUM (from_code);
from = XINT (from_code);
if (from < CHARSET_MIN_CODE (cs))
from = CHARSET_MIN_CODE (cs);
to = CHARSET_MAX_CODE (cs);
else
{
- CHECK_NATNUM (to_code);
to = XINT (to_code);
if (to > CHARSET_MAX_CODE (cs))
to = CHARSET_MAX_CODE (cs);
struct charset charset;
int id;
int dimension;
- int new_definition_p;
+ bool new_definition_p;
int nchars;
if (nargs != charset_arg_max)
val = args[charset_arg_code_space];
for (i = 0, dimension = 0, nchars = 1; ; i++)
{
+ Lisp_Object min_byte_obj, max_byte_obj;
int min_byte, max_byte;
- min_byte = XINT (Faref (val, make_number (i * 2)));
- max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
- if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
- error ("Invalid :code-space value");
+ min_byte_obj = Faref (val, make_number (i * 2));
+ max_byte_obj = Faref (val, make_number (i * 2 + 1));
+ CHECK_RANGED_INTEGER (min_byte_obj, 0, 255);
+ min_byte = XINT (min_byte_obj);
+ CHECK_RANGED_INTEGER (max_byte_obj, min_byte, 255);
+ max_byte = XINT (max_byte_obj);
charset.code_space[i * 4] = min_byte;
charset.code_space[i * 4 + 1] = max_byte;
charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
charset.dimension = dimension;
else
{
- CHECK_NATNUM (val);
+ CHECK_RANGED_INTEGER (val, 1, 4);
charset.dimension = XINT (val);
- if (charset.dimension < 1 || charset.dimension > 4)
- args_out_of_range_3 (val, make_number (1), make_number (4));
}
charset.code_linear_p
if (! charset.code_linear_p)
{
- charset.code_space_mask = (unsigned char *) xmalloc (256);
- memset (charset.code_space_mask, 0, 256);
+ charset.code_space_mask = xzalloc (256);
for (i = 0; i < 4; i++)
for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
j++)
charset.min_code = (charset.code_space[0]
| (charset.code_space[4] << 8)
| (charset.code_space[8] << 16)
- | (charset.code_space[12] << 24));
+ | ((unsigned) charset.code_space[12] << 24));
charset.max_code = (charset.code_space[1]
| (charset.code_space[5] << 8)
| (charset.code_space[9] << 16)
- | (charset.code_space[13] << 24));
+ | ((unsigned) charset.code_space[13] << 24));
charset.char_index_offset = 0;
val = args[charset_arg_min_code];
if (code < charset.min_code
|| code > charset.max_code)
- args_out_of_range_3 (make_number (charset.min_code),
- make_number (charset.max_code), val);
+ args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
+ make_fixnum_or_float (charset.max_code), val);
charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
charset.min_code = code;
}
if (code < charset.min_code
|| code > charset.max_code)
- args_out_of_range_3 (make_number (charset.min_code),
- make_number (charset.max_code), val);
+ args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
+ make_fixnum_or_float (charset.max_code), val);
charset.max_code = code;
}
charset.invalid_code = 0;
else
{
- XSETINT (val, charset.max_code + 1);
- if (XINT (val) == charset.max_code + 1)
+ if (charset.max_code < UINT_MAX)
charset.invalid_code = charset.max_code + 1;
else
error ("Attribute :invalid-code must be specified");
}
}
else
- {
- CHECK_NATNUM (val);
- charset.invalid_code = XFASTINT (val);
- }
+ charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
val = args[charset_arg_iso_final];
if (NILP (val))
charset.iso_revision = -1;
else
{
- CHECK_NUMBER (val);
- if (XINT (val) > 63)
- args_out_of_range (make_number (63), val);
+ CHECK_RANGED_INTEGER (val, -1, 63);
charset.iso_revision = XINT (val);
}
if (! NILP (args[charset_arg_code_offset]))
{
val = args[charset_arg_code_offset];
- CHECK_NUMBER (val);
+ CHECK_CHARACTER (val);
charset.method = CHARSET_METHOD_OFFSET;
charset.code_offset = XINT (val);
- i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
- charset.min_char = i + charset.code_offset;
i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
- charset.max_char = i + charset.code_offset;
- if (charset.max_char > MAX_CHAR)
+ if (MAX_CHAR - charset.code_offset < i)
error ("Unsupported max char: %d", charset.max_char);
+ charset.max_char = i + charset.code_offset;
+ i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
+ charset.min_char = i + charset.code_offset;
i = (charset.min_char >> 7) << 7;
for (; i < 0x10000 && i <= charset.max_char; i += 128)
car_part = XCAR (elt);
cdr_part = XCDR (elt);
CHECK_CHARSET_GET_ID (car_part, this_id);
- CHECK_NUMBER (cdr_part);
+ CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
offset = XINT (cdr_part);
}
else
{
new_definition_p = 0;
id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
- HASH_VALUE (hash_table, charset.hash_index) = attrs;
+ set_hash_value_slot (hash_table, charset.hash_index, attrs);
}
else
{
const char *code_space_chars,
unsigned min_code, unsigned max_code,
int iso_final, int iso_revision, int emacs_mule_id,
- int ascii_compatible, int supplementary,
+ bool ascii_compatible, bool supplementary,
int code_offset)
{
const unsigned char *code_space = (const unsigned char *) code_space_chars;
Lisp_Object args[charset_arg_max];
- Lisp_Object plist[14];
Lisp_Object val;
int i;
args[charset_arg_superset] = Qnil;
args[charset_arg_unify_map] = Qnil;
- plist[0] = intern_c_string (":name");
- plist[1] = args[charset_arg_name];
- plist[2] = intern_c_string (":dimension");
- plist[3] = args[charset_arg_dimension];
- plist[4] = intern_c_string (":code-space");
- plist[5] = args[charset_arg_code_space];
- plist[6] = intern_c_string (":iso-final-char");
- plist[7] = args[charset_arg_iso_final];
- plist[8] = intern_c_string (":emacs-mule-id");
- plist[9] = args[charset_arg_emacs_mule_id];
- plist[10] = intern_c_string (":ascii-compatible-p");
- plist[11] = args[charset_arg_ascii_compatible_p];
- plist[12] = intern_c_string (":code-offset");
- plist[13] = args[charset_arg_code_offset];
-
- args[charset_arg_plist] = Flist (14, plist);
+ args[charset_arg_plist] =
+ listn (CONSTYPE_HEAP, 14,
+ intern_c_string (":name"),
+ args[charset_arg_name],
+ intern_c_string (":dimension"),
+ args[charset_arg_dimension],
+ intern_c_string (":code-space"),
+ args[charset_arg_code_space],
+ intern_c_string (":iso-final-char"),
+ args[charset_arg_iso_final],
+ intern_c_string (":emacs-mule-id"),
+ args[charset_arg_emacs_mule_id],
+ intern_c_string (":ascii-compatible-p"),
+ args[charset_arg_ascii_compatible_p],
+ intern_c_string (":code-offset"),
+ args[charset_arg_code_offset]);
Fdefine_charset_internal (charset_arg_max, args);
return XINT (CHARSET_SYMBOL_ID (name));
Lisp_Object attrs;
CHECK_CHARSET_GET_ATTR (charset, attrs);
- CHARSET_ATTR_PLIST (attrs) = plist;
+ ASET (attrs, charset_plist, plist);
return plist;
}
{
if (! STRINGP (unify_map) && ! VECTORP (unify_map))
signal_error ("Bad unify-map", unify_map);
- CHARSET_UNIFY_MAP (cs) = unify_map;
+ set_charset_attr (cs, charset_unify_map, unify_map);
}
if (NILP (Vchar_unify_table))
Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
}
else if (CHAR_TABLE_P (Vchar_unify_table))
{
- int min_code = CHARSET_MIN_CODE (cs);
- int max_code = CHARSET_MAX_CODE (cs);
+ unsigned min_code = CHARSET_MIN_CODE (cs);
+ unsigned max_code = CHARSET_MAX_CODE (cs);
int min_char = DECODE_CHAR (cs, min_code);
int max_char = DECODE_CHAR (cs, max_code);
(Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
{
int id;
- int chars_flag;
+ bool chars_flag;
CHECK_CHARSET_GET_ID (charset, id);
check_iso_charset_parameter (dimension, chars, final_char);
It may lookup a translation table TABLE if supplied. */
static void
-find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
+find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
+ ptrdiff_t nbytes, Lisp_Object charsets,
+ Lisp_Object table, bool multibyte)
{
const unsigned char *pend = ptr + nbytes;
(Lisp_Object beg, Lisp_Object end, Lisp_Object table)
{
Lisp_Object charsets;
- EMACS_INT from, from_byte, to, stop, stop_byte;
+ ptrdiff_t from, from_byte, to, stop, stop_byte;
int i;
Lisp_Object val;
- int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
+ bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
validate_region (&beg, &end);
from = XFASTINT (beg);
return c;
CHECK_CHARSET_GET_CHARSET (val, charset);
+#ifdef REL_ALLOC
+ /* The call to load_charset below can allocate memory, which screws
+ callers of this function through STRING_CHAR_* macros that hold C
+ pointers to buffer text, if REL_ALLOC is used. */
+ r_alloc_inhibit_buffer_relocation (1);
+#endif
load_charset (charset, 1);
if (! inhibit_load_charset_map)
{
if (unified > 0)
c = unified;
}
+#ifdef REL_ALLOC
+ r_alloc_inhibit_buffer_relocation (0);
+#endif
return c;
}
-/* Return a character correponding to the code-point CODE of
+/* Return a character corresponding to the code-point CODE of
CHARSET. */
int
/* Variable used temporarily by the macro ENCODE_CHAR. */
Lisp_Object charset_work;
-/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
+/* Return a code-point of C in CHARSET. If C doesn't belong to
CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
use CHARSET's strict_max_char instead of max_char. */
}
else /* method == CHARSET_METHOD_OFFSET */
{
- int code_index = c - CHARSET_CODE_OFFSET (charset);
+ unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
code = INDEX_TO_CODE_POINT (charset, code_index);
}
struct charset *
char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
{
- int maybe_null = 0;
+ bool maybe_null = 0;
if (NILP (charset_list))
charset_list = Vcharset_ordered_list;
c = XFASTINT (ch);
charset = CHAR_CHARSET (c);
if (! charset)
- abort ();
+ emacs_abort ();
code = ENCODE_CHAR (charset, c);
if (code == CHARSET_INVALID_CODE (charset))
- abort ();
+ emacs_abort ();
dimension = CHARSET_DIMENSION (charset);
for (val = Qnil; dimension > 0; dimension--)
{
DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
doc: /*
Return charset of a character in the current buffer at position POS.
-If POS is nil, it defauls to the current point.
+If POS is nil, it defaults to the current point.
If POS is out of range, the value is nil. */)
(Lisp_Object pos)
{
(Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
{
int id;
- int chars_flag;
+ bool chars_flag;
check_iso_charset_parameter (dimension, chars, final_char);
chars_flag = XFASTINT (chars) == 96;
tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
if (access (SSDATA (tempdir), 0) < 0)
{
- dir_warning ("Error: charsets directory (%s) does not exist.\n\
+ /* This used to be non-fatal (dir_warning), but it should not
+ happen, and if it does sooner or later it will cause some
+ obscure problem (eg bug#6401), so better abort. */
+ fprintf (stderr, "Error: charsets directory not found:\n\
+%s\n\
Emacs will not function correctly without the character map files.\n\
Please check your installation!\n",
- tempdir);
- /* TODO should this be a fatal error? (Bug#909) */
+ SDATA (tempdir));
+ exit (1);
}
Vcharset_map_path = Fcons (tempdir, Qnil);
defsubr (&Ssort_charsets);
DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
- doc: /* *List of directories to search for charset map files. */);
+ doc: /* List of directories to search for charset map files. */);
Vcharset_map_path = Qnil;
DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,