X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/333d54dade1e7005d5a97612907158fe5ec3d310..542f2c78acd26112754474223c85311d6c9cc2eb:/src/charset.c diff --git a/src/charset.c b/src/charset.c index b1b4993d27..6967b9df61 100644 --- a/src/charset.c +++ b/src/charset.c @@ -61,7 +61,7 @@ Lisp_Object Vcharset_hash_table; /* Table of struct charset. */ struct charset *charset_table; -static int charset_table_size; +static ptrdiff_t charset_table_size; static int charset_table_used; Lisp_Object Qcharsetp; @@ -419,7 +419,7 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries, paying attention to comment character '#'. */ static inline unsigned -read_hex (FILE *fp, int *eof) +read_hex (FILE *fp, int *eof, int *overflow) { int c; unsigned n; @@ -441,15 +441,16 @@ read_hex (FILE *fp, int *eof) *eof = 1; return 0; } - *eof = 0; n = 0; - if (c == 'x') - while ((c = getc (fp)) != EOF && isxdigit (c)) + while (isxdigit (c = getc (fp))) + { + if (UINT_MAX >> 4 < n) + *overflow = 1; n = ((n << 4) - | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10)); - else - while ((c = getc (fp)) != EOF && isdigit (c)) - n = (n * 10) + c - '0'; + | (c - ('0' <= c && c <= '9' ? '0' + : 'A' <= c && c <= 'F' ? 'A' - 10 + : 'a' - 10))); + } if (c != EOF) ungetc (c, fp); return n; @@ -479,7 +480,6 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co unsigned max_code = CHARSET_MAX_CODE (charset); int fd; FILE *fp; - int eof; Lisp_Object suffixes; struct charset_map_entries *head, *entries; int n_entries, count; @@ -504,22 +504,27 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co memset (entries, 0, sizeof (struct charset_map_entries)); n_entries = 0; - eof = 0; while (1) { - unsigned from, to; - int c; + unsigned from, to, c; int idx; + int eof = 0, overflow = 0; - from = read_hex (fp, &eof); + from = read_hex (fp, &eof, &overflow); if (eof) break; if (getc (fp) == '-') - to = read_hex (fp, &eof); + to = read_hex (fp, &eof, &overflow); else to = from; - c = (int) read_hex (fp, &eof); + if (eof) + break; + c = read_hex (fp, &eof, &overflow); + if (eof) + break; + if (overflow) + continue; if (from < min_code || to > max_code || from > to || c > MAX_CHAR) continue; @@ -844,12 +849,12 @@ DEFUN ("define-charset-internal", Fdefine_charset_internal, Sdefine_charset_internal, charset_arg_max, MANY, 0, doc: /* For internal use only. usage: (define-charset-internal ...) */) - (size_t nargs, Lisp_Object *args) + (ptrdiff_t nargs, Lisp_Object *args) { /* Charset attr vector. */ Lisp_Object attrs; Lisp_Object val; - unsigned hash_code; + EMACS_UINT hash_code; struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table); int i, j; struct charset charset; @@ -932,17 +937,8 @@ usage: (define-charset-internal ...) */) val = args[charset_arg_min_code]; if (! NILP (val)) { - unsigned code; + unsigned code = cons_to_unsigned (val, UINT_MAX); - if (INTEGERP (val)) - code = XINT (val); - else - { - CHECK_CONS (val); - CHECK_NUMBER_CAR (val); - CHECK_NUMBER_CDR (val); - code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val))); - } if (code < charset.min_code || code > charset.max_code) args_out_of_range_3 (make_number (charset.min_code), @@ -954,17 +950,8 @@ usage: (define-charset-internal ...) */) val = args[charset_arg_max_code]; if (! NILP (val)) { - unsigned code; + unsigned code = cons_to_unsigned (val, UINT_MAX); - if (INTEGERP (val)) - code = XINT (val); - else - { - CHECK_CONS (val); - CHECK_NUMBER_CAR (val); - CHECK_NUMBER_CDR (val); - code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val))); - } if (code < charset.min_code || code > charset.max_code) args_out_of_range_3 (make_number (charset.min_code), @@ -1163,13 +1150,25 @@ usage: (define-charset-internal ...) */) hash_code); if (charset_table_used == charset_table_size) { - struct charset *new_table - = (struct charset *) xmalloc (sizeof (struct charset) - * (charset_table_size + 16)); - memcpy (new_table, charset_table, - sizeof (struct charset) * charset_table_size); - charset_table_size += 16; + /* Ensure that charset IDs fit into 'int' as well as into the + restriction imposed by fixnums. Although the 'int' restriction + could be removed, too much other code would need altering; for + example, the IDs are stuffed into struct + coding_system.charbuf[i] entries, which are 'int'. */ + int old_size = charset_table_size; + struct charset *new_table = + xpalloc (0, &charset_table_size, 1, + min (INT_MAX, MOST_POSITIVE_FIXNUM), + sizeof *charset_table); + memcpy (new_table, charset_table, old_size * sizeof *new_table); charset_table = new_table; + /* FIXME: Doesn't this leak memory? The old charset_table becomes + unreachable. It could be that this is intentional, because the + old charset table may be in a dumped emacs, and reallocating such + a table may not work. If the memory leak is intentional, a + comment should be added to explain this. If not, the old + charset_table should be freed, by passing it as the 1st argument + to xpalloc and removing the memcpy. */ } id = charset_table_used++; new_definition_p = 1; @@ -1637,7 +1636,7 @@ maybe_unify_char (int c, Lisp_Object val) struct charset *charset; if (INTEGERP (val)) - return XINT (val); + return XFASTINT (val); if (NILP (val)) return c; @@ -1647,7 +1646,7 @@ maybe_unify_char (int c, Lisp_Object val) { val = CHAR_TABLE_REF (Vchar_unify_table, c); if (! NILP (val)) - c = XINT (val); + c = XFASTINT (val); } else { @@ -1865,17 +1864,7 @@ and CODE-POINT to a character. Currently not supported and just ignored. */) struct charset *charsetp; CHECK_CHARSET_GET_ID (charset, id); - if (CONSP (code_point)) - { - CHECK_NATNUM_CAR (code_point); - CHECK_NATNUM_CDR (code_point); - code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point))); - } - else - { - CHECK_NATNUM (code_point); - code = XINT (code_point); - } + code = cons_to_unsigned (code_point, UINT_MAX); charsetp = CHARSET_FROM_ID (id); c = DECODE_CHAR (charsetp, code); return (c >= 0 ? make_number (c) : Qnil); @@ -1890,19 +1879,18 @@ Optional argument RESTRICTION specifies a way to map CH to a code-point in CCS. Currently not supported and just ignored. */) (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction) { - int id; + int c, id; unsigned code; struct charset *charsetp; CHECK_CHARSET_GET_ID (charset, id); - CHECK_NATNUM (ch); + CHECK_CHARACTER (ch); + c = XFASTINT (ch); charsetp = CHARSET_FROM_ID (id); - code = ENCODE_CHAR (charsetp, XINT (ch)); + code = ENCODE_CHAR (charsetp, c); if (code == CHARSET_INVALID_CODE (charsetp)) return Qnil; - if (code > 0x7FFFFFF) - return Fcons (make_number (code >> 16), make_number (code & 0xFFFF)); - return make_number (code); + return INTEGER_TO_CONS (code); } @@ -2174,11 +2162,11 @@ DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority, 1, MANY, 0, doc: /* Assign higher priority to the charsets given as arguments. usage: (set-charset-priority &rest charsets) */) - (size_t nargs, Lisp_Object *args) + (ptrdiff_t nargs, Lisp_Object *args) { Lisp_Object new_head, old_list, arglist[2]; Lisp_Object list_2022, list_emacs_mule; - size_t i; + ptrdiff_t i; int id; old_list = Fcopy_sequence (Vcharset_ordered_list); @@ -2239,14 +2227,16 @@ struct charset_sort_data { Lisp_Object charset; int id; - int priority; + ptrdiff_t priority; }; static int charset_compare (const void *d1, const void *d2) { const struct charset_sort_data *data1 = d1, *data2 = d2; - return (data1->priority - data2->priority); + if (data1->priority != data2->priority) + return data1->priority < data2->priority ? -1 : 1; + return 0; } DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0, @@ -2256,7 +2246,8 @@ See also `charset-priority-list' and `set-charset-priority'. */) (Lisp_Object charsets) { Lisp_Object len = Flength (charsets); - int n = XFASTINT (len), i, j, done; + ptrdiff_t n = XFASTINT (len), i, j; + int done; Lisp_Object tail, elt, attrs; struct charset_sort_data *sort_data; int id, min_id = INT_MAX, max_id = INT_MIN; @@ -2264,7 +2255,7 @@ See also `charset-priority-list' and `set-charset-priority'. */) if (n == 0) return Qnil; - SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n); + SAFE_NALLOCA (sort_data, 1, n); for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++) { elt = XCAR (tail); @@ -2339,6 +2330,17 @@ init_charset_once (void) void syms_of_charset (void) { + /* Allocate an initial charset table that is just under 64 KiB in size. + This should be large enough so that the charset table need not be + reallocated during an initial bootstrap. Allocating anything larger than + 64 KiB in an initial run may not work, because glibc malloc might use + mmap for larger allocations, and these don't work well across dumped + systems. */ + enum { + initial_malloc_max = (1 << 16) - 1, + charset_table_size_init = initial_malloc_max / sizeof (struct charset) + }; + DEFSYM (Qcharsetp, "charsetp"); DEFSYM (Qascii, "ascii"); @@ -2371,9 +2373,9 @@ syms_of_charset (void) Vcharset_hash_table = Fmake_hash_table (2, args); } - charset_table_size = 128; - charset_table = ((struct charset *) - xmalloc (sizeof (struct charset) * charset_table_size)); + charset_table = (struct charset *) xmalloc (sizeof (struct charset) + * charset_table_size_init); + charset_table_size = charset_table_size_init; charset_table_used = 0; defsubr (&Scharsetp);