/* Basic character set support.
- Copyright (C) 2001-2011 Free Software Foundation, Inc.
+ Copyright (C) 2001-2012 Free Software Foundation, Inc.
Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
2005, 2006, 2007, 2008, 2009, 2010, 2011
National Institute of Advanced Industrial Science and Technology (AIST)
#include <config.h>
+#define CHARSET_INLINE EXTERN_INLINE
+
#include <stdio.h>
#include <unistd.h>
-#include <ctype.h>
#include <limits.h>
#include <sys/types.h>
-#include <setjmp.h>
+#include <c-ctype.h>
#include "lisp.h"
#include "character.h"
#include "charset.h"
#define CODE_POINT_TO_INDEX(charset, code) \
((charset)->code_linear_p \
- ? (code) - (charset)->min_code \
+ ? (int) ((code) - (charset)->min_code) \
: (((charset)->code_space_mask[(code) >> 24] & 0x8) \
&& ((charset)->code_space_mask[((code) >> 16) & 0xFF] & 0x4) \
&& ((charset)->code_space_mask[((code) >> 8) & 0xFF] & 0x2) \
&& ((charset)->code_space_mask[(code) & 0xFF] & 0x1)) \
- ? (((((code) >> 24) - (charset)->code_space[12]) \
- * (charset)->code_space[11]) \
- + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
- * (charset)->code_space[7]) \
- + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
- * (charset)->code_space[3]) \
- + (((code) & 0xFF) - (charset)->code_space[0]) \
- - ((charset)->char_index_offset)) \
+ ? (int) (((((code) >> 24) - (charset)->code_space[12]) \
+ * (charset)->code_space[11]) \
+ + (((((code) >> 16) & 0xFF) - (charset)->code_space[8]) \
+ * (charset)->code_space[7]) \
+ + (((((code) >> 8) & 0xFF) - (charset)->code_space[4]) \
+ * (charset)->code_space[3]) \
+ + (((code) & 0xFF) - (charset)->code_space[0]) \
+ - ((charset)->char_index_offset)) \
: -1)
-/* Convert the character index IDX to code-point CODE for CHARSET.
- It is assumed that IDX is in a valid range. */
+/* Return the code-point for the character index IDX in CHARSET.
+ IDX should be an unsigned int variable in a valid range (which is
+ always in nonnegative int range too). IDX contains garbage afterwards. */
#define INDEX_TO_CODE_POINT(charset, idx) \
((charset)->code_linear_p \
/* Set to 1 to warn that a charset map is loaded and thus a buffer
text and a string data may be relocated. */
-int charset_map_loaded;
+bool charset_map_loaded;
struct charset_map_entries
{
{
Lisp_Object vec IF_LINT (= Qnil), table IF_LINT (= Qnil);
unsigned max_code = CHARSET_MAX_CODE (charset);
- int ascii_compatible_p = charset->ascii_compatible_p;
+ bool ascii_compatible_p = charset->ascii_compatible_p;
int min_char, max_char, nonascii_min_char;
int i;
unsigned char *fast_map = charset->fast_map;
{
int n = CODE_POINT_TO_INDEX (charset, max_code) + 1;
- vec = CHARSET_DECODER (charset)
- = Fmake_vector (make_number (n), make_number (-1));
+ vec = Fmake_vector (make_number (n), make_number (-1));
+ set_charset_attr (charset, charset_decoder, vec);
}
else
{
else
{
table = Fmake_char_table (Qnil, Qnil);
- if (charset->method == CHARSET_METHOD_MAP)
- CHARSET_ENCODER (charset) = table;
- else
- CHARSET_DEUNIFIER (charset) = table;
+ set_charset_attr (charset,
+ (charset->method == CHARSET_METHOD_MAP
+ ? charset_encoder : charset_deunifier),
+ table);
}
}
else
{
if (! temp_charset_work)
- temp_charset_work = xmalloc (sizeof (*temp_charset_work));
+ temp_charset_work = xmalloc (sizeof *temp_charset_work);
if (control_flag == 1)
{
memset (temp_charset_work->table.decoder, -1,
&& CHARSET_COMPACT_CODES_P (charset))
for (; from_index < lim_index; from_index++, from_c++)
{
- unsigned code = INDEX_TO_CODE_POINT (charset, from_index);
+ unsigned code = from_index;
+ code = INDEX_TO_CODE_POINT (charset, code);
if (NILP (CHAR_TABLE_REF (table, from_c)))
CHAR_TABLE_SET (table, from_c, make_number (code));
paying attention to comment character '#'. */
static inline unsigned
-read_hex (FILE *fp, int *eof, int *overflow)
+read_hex (FILE *fp, bool *eof, bool *overflow)
{
int c;
unsigned n;
return 0;
}
n = 0;
- while (isxdigit (c = getc (fp)))
+ while (c_isxdigit (c = getc (fp)))
{
if (UINT_MAX >> 4 < n)
*overflow = 1;
FILE *fp;
Lisp_Object suffixes;
struct charset_map_entries *head, *entries;
- int n_entries, count;
+ int n_entries;
+ ptrdiff_t count;
USE_SAFE_ALLOCA;
suffixes = Fcons (build_string (".map"),
/* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
large (larger than MAX_ALLOCA). */
- SAFE_ALLOCA (head, struct charset_map_entries *,
- sizeof (struct charset_map_entries));
+ head = SAFE_ALLOCA (sizeof *head);
entries = head;
memset (entries, 0, sizeof (struct charset_map_entries));
{
unsigned from, to, c;
int idx;
- int eof = 0, overflow = 0;
+ bool eof = 0, overflow = 0;
from = read_hex (fp, &eof, &overflow);
if (eof)
if (n_entries > 0 && (n_entries % 0x10000) == 0)
{
- SAFE_ALLOCA (entries->next, struct charset_map_entries *,
- sizeof (struct charset_map_entries));
+ entries->next = SAFE_ALLOCA (sizeof *entries->next);
entries = entries->next;
memset (entries, 0, sizeof (struct charset_map_entries));
+ n_entries = 0;
}
- idx = n_entries % 0x10000;
+ idx = n_entries;
entries->entry[idx].from = from;
entries->entry[idx].to = to;
entries->entry[idx].c = c;
/* Use SAFE_ALLOCA instead of alloca, as `charset_map_entries' is
large (larger than MAX_ALLOCA). */
- SAFE_ALLOCA (head, struct charset_map_entries *,
- sizeof (struct charset_map_entries));
+ head = SAFE_ALLOCA (sizeof *head);
entries = head;
memset (entries, 0, sizeof (struct charset_map_entries));
{
Lisp_Object val, val2;
unsigned from, to;
- int c;
+ EMACS_INT c;
int idx;
val = AREF (vec, i);
{
val2 = XCDR (val);
val = XCAR (val);
- CHECK_NATNUM (val);
- CHECK_NATNUM (val2);
from = XFASTINT (val);
to = XFASTINT (val2);
}
else
- {
- CHECK_NATNUM (val);
- from = to = XFASTINT (val);
- }
+ from = to = XFASTINT (val);
val = AREF (vec, i + 1);
CHECK_NATNUM (val);
c = XFASTINT (val);
if (n_entries > 0 && (n_entries % 0x10000) == 0)
{
- SAFE_ALLOCA (entries->next, struct charset_map_entries *,
- sizeof (struct charset_map_entries));
+ entries->next = SAFE_ALLOCA (sizeof *entries->next);
entries = entries->next;
memset (entries, 0, sizeof (struct charset_map_entries));
}
else
{
if (! CHARSET_UNIFIED_P (charset))
- abort ();
+ emacs_abort ();
map = CHARSET_UNIFY_MAP (charset);
}
if (STRINGP (map))
Lisp_Object arg, struct charset *charset, unsigned from, unsigned to)
{
Lisp_Object range;
- int partial;
-
- partial = (from > CHARSET_MIN_CODE (charset)
- || to < CHARSET_MAX_CODE (charset));
+ bool partial = (from > CHARSET_MIN_CODE (charset)
+ || to < CHARSET_MAX_CODE (charset));
if (CHARSET_METHOD (charset) == CHARSET_METHOD_OFFSET)
{
from = CHARSET_MIN_CODE (cs);
else
{
- CHECK_NATNUM (from_code);
from = XINT (from_code);
if (from < CHARSET_MIN_CODE (cs))
from = CHARSET_MIN_CODE (cs);
to = CHARSET_MAX_CODE (cs);
else
{
- CHECK_NATNUM (to_code);
to = XINT (to_code);
if (to > CHARSET_MAX_CODE (cs))
to = CHARSET_MAX_CODE (cs);
struct charset charset;
int id;
int dimension;
- int new_definition_p;
+ bool new_definition_p;
int nchars;
if (nargs != charset_arg_max)
val = args[charset_arg_code_space];
for (i = 0, dimension = 0, nchars = 1; ; i++)
{
+ Lisp_Object min_byte_obj, max_byte_obj;
int min_byte, max_byte;
- min_byte = XINT (Faref (val, make_number (i * 2)));
- max_byte = XINT (Faref (val, make_number (i * 2 + 1)));
- if (min_byte < 0 || min_byte > max_byte || max_byte >= 256)
- error ("Invalid :code-space value");
+ min_byte_obj = Faref (val, make_number (i * 2));
+ max_byte_obj = Faref (val, make_number (i * 2 + 1));
+ CHECK_RANGED_INTEGER (min_byte_obj, 0, 255);
+ min_byte = XINT (min_byte_obj);
+ CHECK_RANGED_INTEGER (max_byte_obj, min_byte, 255);
+ max_byte = XINT (max_byte_obj);
charset.code_space[i * 4] = min_byte;
charset.code_space[i * 4 + 1] = max_byte;
charset.code_space[i * 4 + 2] = max_byte - min_byte + 1;
charset.dimension = dimension;
else
{
- CHECK_NATNUM (val);
+ CHECK_RANGED_INTEGER (val, 1, 4);
charset.dimension = XINT (val);
- if (charset.dimension < 1 || charset.dimension > 4)
- args_out_of_range_3 (val, make_number (1), make_number (4));
}
charset.code_linear_p
if (! charset.code_linear_p)
{
- charset.code_space_mask = (unsigned char *) xmalloc (256);
- memset (charset.code_space_mask, 0, 256);
+ charset.code_space_mask = xzalloc (256);
for (i = 0; i < 4; i++)
for (j = charset.code_space[i * 4]; j <= charset.code_space[i * 4 + 1];
j++)
charset.min_code = (charset.code_space[0]
| (charset.code_space[4] << 8)
| (charset.code_space[8] << 16)
- | (charset.code_space[12] << 24));
+ | ((unsigned) charset.code_space[12] << 24));
charset.max_code = (charset.code_space[1]
| (charset.code_space[5] << 8)
| (charset.code_space[9] << 16)
- | (charset.code_space[13] << 24));
+ | ((unsigned) charset.code_space[13] << 24));
charset.char_index_offset = 0;
val = args[charset_arg_min_code];
if (code < charset.min_code
|| code > charset.max_code)
- args_out_of_range_3 (make_number (charset.min_code),
- make_number (charset.max_code), val);
+ args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
+ make_fixnum_or_float (charset.max_code), val);
charset.char_index_offset = CODE_POINT_TO_INDEX (&charset, code);
charset.min_code = code;
}
if (code < charset.min_code
|| code > charset.max_code)
- args_out_of_range_3 (make_number (charset.min_code),
- make_number (charset.max_code), val);
+ args_out_of_range_3 (make_fixnum_or_float (charset.min_code),
+ make_fixnum_or_float (charset.max_code), val);
charset.max_code = code;
}
charset.invalid_code = 0;
else
{
- XSETINT (val, charset.max_code + 1);
- if (XINT (val) == charset.max_code + 1)
+ if (charset.max_code < UINT_MAX)
charset.invalid_code = charset.max_code + 1;
else
error ("Attribute :invalid-code must be specified");
}
}
else
- {
- CHECK_NATNUM (val);
- charset.invalid_code = XFASTINT (val);
- }
+ charset.invalid_code = cons_to_unsigned (val, UINT_MAX);
val = args[charset_arg_iso_final];
if (NILP (val))
charset.iso_revision = -1;
else
{
- CHECK_NUMBER (val);
- if (XINT (val) > 63)
- args_out_of_range (make_number (63), val);
+ CHECK_RANGED_INTEGER (val, -1, 63);
charset.iso_revision = XINT (val);
}
if (! NILP (args[charset_arg_code_offset]))
{
val = args[charset_arg_code_offset];
- CHECK_NUMBER (val);
+ CHECK_CHARACTER (val);
charset.method = CHARSET_METHOD_OFFSET;
charset.code_offset = XINT (val);
- i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
- charset.min_char = i + charset.code_offset;
i = CODE_POINT_TO_INDEX (&charset, charset.max_code);
- charset.max_char = i + charset.code_offset;
- if (charset.max_char > MAX_CHAR)
+ if (MAX_CHAR - charset.code_offset < i)
error ("Unsupported max char: %d", charset.max_char);
+ charset.max_char = i + charset.code_offset;
+ i = CODE_POINT_TO_INDEX (&charset, charset.min_code);
+ charset.min_char = i + charset.code_offset;
i = (charset.min_char >> 7) << 7;
for (; i < 0x10000 && i <= charset.max_char; i += 128)
car_part = XCAR (elt);
cdr_part = XCDR (elt);
CHECK_CHARSET_GET_ID (car_part, this_id);
- CHECK_NUMBER (cdr_part);
+ CHECK_TYPE_RANGED_INTEGER (int, cdr_part);
offset = XINT (cdr_part);
}
else
{
new_definition_p = 0;
id = XFASTINT (CHARSET_SYMBOL_ID (args[charset_arg_name]));
- HASH_VALUE (hash_table, charset.hash_index) = attrs;
+ set_hash_value_slot (hash_table, charset.hash_index, attrs);
}
else
{
const char *code_space_chars,
unsigned min_code, unsigned max_code,
int iso_final, int iso_revision, int emacs_mule_id,
- int ascii_compatible, int supplementary,
+ bool ascii_compatible, bool supplementary,
int code_offset)
{
const unsigned char *code_space = (const unsigned char *) code_space_chars;
Lisp_Object args[charset_arg_max];
- Lisp_Object plist[14];
Lisp_Object val;
int i;
args[charset_arg_superset] = Qnil;
args[charset_arg_unify_map] = Qnil;
- plist[0] = intern_c_string (":name");
- plist[1] = args[charset_arg_name];
- plist[2] = intern_c_string (":dimension");
- plist[3] = args[charset_arg_dimension];
- plist[4] = intern_c_string (":code-space");
- plist[5] = args[charset_arg_code_space];
- plist[6] = intern_c_string (":iso-final-char");
- plist[7] = args[charset_arg_iso_final];
- plist[8] = intern_c_string (":emacs-mule-id");
- plist[9] = args[charset_arg_emacs_mule_id];
- plist[10] = intern_c_string (":ascii-compatible-p");
- plist[11] = args[charset_arg_ascii_compatible_p];
- plist[12] = intern_c_string (":code-offset");
- plist[13] = args[charset_arg_code_offset];
-
- args[charset_arg_plist] = Flist (14, plist);
+ args[charset_arg_plist] =
+ listn (CONSTYPE_HEAP, 14,
+ intern_c_string (":name"),
+ args[charset_arg_name],
+ intern_c_string (":dimension"),
+ args[charset_arg_dimension],
+ intern_c_string (":code-space"),
+ args[charset_arg_code_space],
+ intern_c_string (":iso-final-char"),
+ args[charset_arg_iso_final],
+ intern_c_string (":emacs-mule-id"),
+ args[charset_arg_emacs_mule_id],
+ intern_c_string (":ascii-compatible-p"),
+ args[charset_arg_ascii_compatible_p],
+ intern_c_string (":code-offset"),
+ args[charset_arg_code_offset]);
Fdefine_charset_internal (charset_arg_max, args);
return XINT (CHARSET_SYMBOL_ID (name));
Lisp_Object attrs;
CHECK_CHARSET_GET_ATTR (charset, attrs);
- CHARSET_ATTR_PLIST (attrs) = plist;
+ ASET (attrs, charset_plist, plist);
return plist;
}
{
if (! STRINGP (unify_map) && ! VECTORP (unify_map))
signal_error ("Bad unify-map", unify_map);
- CHARSET_UNIFY_MAP (cs) = unify_map;
+ set_charset_attr (cs, charset_unify_map, unify_map);
}
if (NILP (Vchar_unify_table))
Vchar_unify_table = Fmake_char_table (Qnil, Qnil);
}
else if (CHAR_TABLE_P (Vchar_unify_table))
{
- int min_code = CHARSET_MIN_CODE (cs);
- int max_code = CHARSET_MAX_CODE (cs);
+ unsigned min_code = CHARSET_MIN_CODE (cs);
+ unsigned max_code = CHARSET_MAX_CODE (cs);
int min_char = DECODE_CHAR (cs, min_code);
int max_char = DECODE_CHAR (cs, max_code);
(Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char, Lisp_Object charset)
{
int id;
- int chars_flag;
+ bool chars_flag;
CHECK_CHARSET_GET_ID (charset, id);
check_iso_charset_parameter (dimension, chars, final_char);
It may lookup a translation table TABLE if supplied. */
static void
-find_charsets_in_text (const unsigned char *ptr, EMACS_INT nchars, EMACS_INT nbytes, Lisp_Object charsets, Lisp_Object table, int multibyte)
+find_charsets_in_text (const unsigned char *ptr, ptrdiff_t nchars,
+ ptrdiff_t nbytes, Lisp_Object charsets,
+ Lisp_Object table, bool multibyte)
{
const unsigned char *pend = ptr + nbytes;
(Lisp_Object beg, Lisp_Object end, Lisp_Object table)
{
Lisp_Object charsets;
- EMACS_INT from, from_byte, to, stop, stop_byte;
+ ptrdiff_t from, from_byte, to, stop, stop_byte;
int i;
Lisp_Object val;
- int multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
+ bool multibyte = ! NILP (BVAR (current_buffer, enable_multibyte_characters));
validate_region (&beg, &end);
from = XFASTINT (beg);
return c;
CHECK_CHARSET_GET_CHARSET (val, charset);
+#ifdef REL_ALLOC
+ /* The call to load_charset below can allocate memory, which screws
+ callers of this function through STRING_CHAR_* macros that hold C
+ pointers to buffer text, if REL_ALLOC is used. */
+ r_alloc_inhibit_buffer_relocation (1);
+#endif
load_charset (charset, 1);
if (! inhibit_load_charset_map)
{
if (unified > 0)
c = unified;
}
+#ifdef REL_ALLOC
+ r_alloc_inhibit_buffer_relocation (0);
+#endif
return c;
}
/* Variable used temporarily by the macro ENCODE_CHAR. */
Lisp_Object charset_work;
-/* Return a code-point of CHAR in CHARSET. If CHAR doesn't belong to
+/* Return a code-point of C in CHARSET. If C doesn't belong to
CHARSET, return CHARSET_INVALID_CODE (CHARSET). If STRICT is true,
use CHARSET's strict_max_char instead of max_char. */
}
else /* method == CHARSET_METHOD_OFFSET */
{
- int code_index = c - CHARSET_CODE_OFFSET (charset);
+ unsigned code_index = c - CHARSET_CODE_OFFSET (charset);
code = INDEX_TO_CODE_POINT (charset, code_index);
}
struct charset *
char_charset (int c, Lisp_Object charset_list, unsigned int *code_return)
{
- int maybe_null = 0;
+ bool maybe_null = 0;
if (NILP (charset_list))
charset_list = Vcharset_ordered_list;
c = XFASTINT (ch);
charset = CHAR_CHARSET (c);
if (! charset)
- abort ();
+ emacs_abort ();
code = ENCODE_CHAR (charset, c);
if (code == CHARSET_INVALID_CODE (charset))
- abort ();
+ emacs_abort ();
dimension = CHARSET_DIMENSION (charset);
for (val = Qnil; dimension > 0; dimension--)
{
(Lisp_Object dimension, Lisp_Object chars, Lisp_Object final_char)
{
int id;
- int chars_flag;
+ bool chars_flag;
check_iso_charset_parameter (dimension, chars, final_char);
chars_flag = XFASTINT (chars) == 96;
tempdir = Fexpand_file_name (build_string ("charsets"), Vdata_directory);
if (access (SSDATA (tempdir), 0) < 0)
{
- dir_warning ("Error: charsets directory (%s) does not exist.\n\
+ /* This used to be non-fatal (dir_warning), but it should not
+ happen, and if it does sooner or later it will cause some
+ obscure problem (eg bug#6401), so better abort. */
+ fprintf (stderr, "Error: charsets directory not found:\n\
+%s\n\
Emacs will not function correctly without the character map files.\n\
Please check your installation!\n",
- tempdir);
- /* TODO should this be a fatal error? (Bug#909) */
+ SDATA (tempdir));
+ exit (1);
}
Vcharset_map_path = Fcons (tempdir, Qnil);
defsubr (&Ssort_charsets);
DEFVAR_LISP ("charset-map-path", Vcharset_map_path,
- doc: /* *List of directories to search for charset map files. */);
+ doc: /* List of directories to search for charset map files. */);
Vcharset_map_path = Qnil;
DEFVAR_BOOL ("inhibit-load-charset-map", inhibit_load_charset_map,