X-Git-Url: https://code.delx.au/gnu-emacs/blobdiff_plain/333d54dade1e7005d5a97612907158fe5ec3d310..542f2c78acd26112754474223c85311d6c9cc2eb:/src/charset.c

diff --git a/src/charset.c b/src/charset.c
index b1b4993d27..6967b9df61 100644
--- a/src/charset.c
+++ b/src/charset.c
@@ -61,7 +61,7 @@ Lisp_Object Vcharset_hash_table;
 /* Table of struct charset.  */
 struct charset *charset_table;
 
-static int charset_table_size;
+static ptrdiff_t charset_table_size;
 static int charset_table_used;
 
 Lisp_Object Qcharsetp;
@@ -419,7 +419,7 @@ load_charset_map (struct charset *charset, struct charset_map_entries *entries,
    paying attention to comment character '#'.  */
 
 static inline unsigned
-read_hex (FILE *fp, int *eof)
+read_hex (FILE *fp, int *eof, int *overflow)
 {
   int c;
   unsigned n;
@@ -441,15 +441,16 @@ read_hex (FILE *fp, int *eof)
       *eof = 1;
       return 0;
     }
-  *eof = 0;
   n = 0;
-  if (c == 'x')
-    while ((c = getc (fp)) != EOF && isxdigit (c))
+  while (isxdigit (c = getc (fp)))
+    {
+      if (UINT_MAX >> 4 < n)
+	*overflow = 1;
       n = ((n << 4)
-	   | (c <= '9' ? c - '0' : c <= 'F' ? c - 'A' + 10 : c - 'a' + 10));
-  else
-    while ((c = getc (fp)) != EOF && isdigit (c))
-      n = (n * 10) + c - '0';
+	   | (c - ('0' <= c && c <= '9' ? '0'
+		   : 'A' <= c && c <= 'F' ? 'A' - 10
+		   : 'a' - 10)));
+    }
   if (c != EOF)
     ungetc (c, fp);
   return n;
@@ -479,7 +480,6 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co
   unsigned max_code = CHARSET_MAX_CODE (charset);
   int fd;
   FILE *fp;
-  int eof;
   Lisp_Object suffixes;
   struct charset_map_entries *head, *entries;
   int n_entries, count;
@@ -504,22 +504,27 @@ load_charset_map_from_file (struct charset *charset, Lisp_Object mapfile, int co
   memset (entries, 0, sizeof (struct charset_map_entries));
 
   n_entries = 0;
-  eof = 0;
   while (1)
     {
-      unsigned from, to;
-      int c;
+      unsigned from, to, c;
       int idx;
+      int eof = 0, overflow = 0;
 
-      from = read_hex (fp, &eof);
+      from = read_hex (fp, &eof, &overflow);
       if (eof)
 	break;
       if (getc (fp) == '-')
-	to = read_hex (fp, &eof);
+	to = read_hex (fp, &eof, &overflow);
       else
 	to = from;
-      c = (int) read_hex (fp, &eof);
+      if (eof)
+	break;
+      c = read_hex (fp, &eof, &overflow);
+      if (eof)
+	break;
 
+      if (overflow)
+	continue;
       if (from < min_code || to > max_code || from > to || c > MAX_CHAR)
 	continue;
 
@@ -844,12 +849,12 @@ DEFUN ("define-charset-internal", Fdefine_charset_internal,
        Sdefine_charset_internal, charset_arg_max, MANY, 0,
        doc: /* For internal use only.
 usage: (define-charset-internal ...)  */)
-  (size_t nargs, Lisp_Object *args)
+  (ptrdiff_t nargs, Lisp_Object *args)
 {
   /* Charset attr vector.  */
   Lisp_Object attrs;
   Lisp_Object val;
-  unsigned hash_code;
+  EMACS_UINT hash_code;
   struct Lisp_Hash_Table *hash_table = XHASH_TABLE (Vcharset_hash_table);
   int i, j;
   struct charset charset;
@@ -932,17 +937,8 @@ usage: (define-charset-internal ...)  */)
   val = args[charset_arg_min_code];
   if (! NILP (val))
     {
-      unsigned code;
+      unsigned code = cons_to_unsigned (val, UINT_MAX);
 
-      if (INTEGERP (val))
-	code = XINT (val);
-      else
-	{
-	  CHECK_CONS (val);
-	  CHECK_NUMBER_CAR (val);
-	  CHECK_NUMBER_CDR (val);
-	  code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
-	}
       if (code < charset.min_code
 	  || code > charset.max_code)
 	args_out_of_range_3 (make_number (charset.min_code),
@@ -954,17 +950,8 @@ usage: (define-charset-internal ...)  */)
   val = args[charset_arg_max_code];
   if (! NILP (val))
     {
-      unsigned code;
+      unsigned code = cons_to_unsigned (val, UINT_MAX);
 
-      if (INTEGERP (val))
-	code = XINT (val);
-      else
-	{
-	  CHECK_CONS (val);
-	  CHECK_NUMBER_CAR (val);
-	  CHECK_NUMBER_CDR (val);
-	  code = (XINT (XCAR (val)) << 16) | (XINT (XCDR (val)));
-	}
       if (code < charset.min_code
 	  || code > charset.max_code)
 	args_out_of_range_3 (make_number (charset.min_code),
@@ -1163,13 +1150,25 @@ usage: (define-charset-internal ...)  */)
 				     hash_code);
       if (charset_table_used == charset_table_size)
 	{
-	  struct charset *new_table
-	    = (struct charset *) xmalloc (sizeof (struct charset)
-					  * (charset_table_size + 16));
-	  memcpy (new_table, charset_table,
-		  sizeof (struct charset) * charset_table_size);
-	  charset_table_size += 16;
+	  /* Ensure that charset IDs fit into 'int' as well as into the
+	     restriction imposed by fixnums.  Although the 'int' restriction
+	     could be removed, too much other code would need altering; for
+	     example, the IDs are stuffed into struct
+	     coding_system.charbuf[i] entries, which are 'int'.  */
+	  int old_size = charset_table_size;
+	  struct charset *new_table =
+	    xpalloc (0, &charset_table_size, 1,
+		     min (INT_MAX, MOST_POSITIVE_FIXNUM),
+		     sizeof *charset_table);
+	  memcpy (new_table, charset_table, old_size * sizeof *new_table);
 	  charset_table = new_table;
+	  /* FIXME: Doesn't this leak memory?  The old charset_table becomes
+	     unreachable.  It could be that this is intentional, because the
+	     old charset table may be in a dumped emacs, and reallocating such
+	     a table may not work.  If the memory leak is intentional, a
+	     comment should be added to explain this.  If not, the old
+	     charset_table should be freed, by passing it as the 1st argument
+	     to xpalloc and removing the memcpy.  */
 	}
       id = charset_table_used++;
       new_definition_p = 1;
@@ -1637,7 +1636,7 @@ maybe_unify_char (int c, Lisp_Object val)
   struct charset *charset;
 
   if (INTEGERP (val))
-    return XINT (val);
+    return XFASTINT (val);
   if (NILP (val))
     return c;
 
@@ -1647,7 +1646,7 @@ maybe_unify_char (int c, Lisp_Object val)
     {
       val = CHAR_TABLE_REF (Vchar_unify_table, c);
       if (! NILP (val))
-	c = XINT (val);
+	c = XFASTINT (val);
     }
   else
     {
@@ -1865,17 +1864,7 @@ and CODE-POINT to a character.  Currently not supported and just ignored.  */)
   struct charset *charsetp;
 
   CHECK_CHARSET_GET_ID (charset, id);
-  if (CONSP (code_point))
-    {
-      CHECK_NATNUM_CAR (code_point);
-      CHECK_NATNUM_CDR (code_point);
-      code = (XINT (XCAR (code_point)) << 16) | (XINT (XCDR (code_point)));
-    }
-  else
-    {
-      CHECK_NATNUM (code_point);
-      code = XINT (code_point);
-    }
+  code = cons_to_unsigned (code_point, UINT_MAX);
   charsetp = CHARSET_FROM_ID (id);
   c = DECODE_CHAR (charsetp, code);
   return (c >= 0 ? make_number (c) : Qnil);
@@ -1890,19 +1879,18 @@ Optional argument RESTRICTION specifies a way to map CH to a
 code-point in CCS.  Currently not supported and just ignored.  */)
   (Lisp_Object ch, Lisp_Object charset, Lisp_Object restriction)
 {
-  int id;
+  int c, id;
   unsigned code;
   struct charset *charsetp;
 
   CHECK_CHARSET_GET_ID (charset, id);
-  CHECK_NATNUM (ch);
+  CHECK_CHARACTER (ch);
+  c = XFASTINT (ch);
   charsetp = CHARSET_FROM_ID (id);
-  code = ENCODE_CHAR (charsetp, XINT (ch));
+  code = ENCODE_CHAR (charsetp, c);
   if (code == CHARSET_INVALID_CODE (charsetp))
     return Qnil;
-  if (code > 0x7FFFFFF)
-    return Fcons (make_number (code >> 16), make_number (code & 0xFFFF));
-  return make_number (code);
+  return INTEGER_TO_CONS (code);
 }
 
 
@@ -2174,11 +2162,11 @@ DEFUN ("set-charset-priority", Fset_charset_priority, Sset_charset_priority,
        1, MANY, 0,
        doc: /* Assign higher priority to the charsets given as arguments.
 usage: (set-charset-priority &rest charsets)  */)
-  (size_t nargs, Lisp_Object *args)
+  (ptrdiff_t nargs, Lisp_Object *args)
 {
   Lisp_Object new_head, old_list, arglist[2];
   Lisp_Object list_2022, list_emacs_mule;
-  size_t i;
+  ptrdiff_t i;
   int id;
 
   old_list = Fcopy_sequence (Vcharset_ordered_list);
@@ -2239,14 +2227,16 @@ struct charset_sort_data
 {
   Lisp_Object charset;
   int id;
-  int priority;
+  ptrdiff_t priority;
 };
 
 static int
 charset_compare (const void *d1, const void *d2)
 {
   const struct charset_sort_data *data1 = d1, *data2 = d2;
-  return (data1->priority - data2->priority);
+  if (data1->priority != data2->priority)
+    return data1->priority < data2->priority ? -1 : 1;
+  return 0;
 }
 
 DEFUN ("sort-charsets", Fsort_charsets, Ssort_charsets, 1, 1, 0,
@@ -2256,7 +2246,8 @@ See also `charset-priority-list' and `set-charset-priority'.  */)
      (Lisp_Object charsets)
 {
   Lisp_Object len = Flength (charsets);
-  int n = XFASTINT (len), i, j, done;
+  ptrdiff_t n = XFASTINT (len), i, j;
+  int done;
   Lisp_Object tail, elt, attrs;
   struct charset_sort_data *sort_data;
   int id, min_id = INT_MAX, max_id = INT_MIN;
@@ -2264,7 +2255,7 @@ See also `charset-priority-list' and `set-charset-priority'.  */)
 
   if (n == 0)
     return Qnil;
-  SAFE_ALLOCA (sort_data, struct charset_sort_data *, sizeof (*sort_data) * n);
+  SAFE_NALLOCA (sort_data, 1, n);
   for (tail = charsets, i = 0; CONSP (tail); tail = XCDR (tail), i++)
     {
       elt = XCAR (tail);
@@ -2339,6 +2330,17 @@ init_charset_once (void)
 void
 syms_of_charset (void)
 {
+  /* Allocate an initial charset table that is just under 64 KiB in size.
+     This should be large enough so that the charset table need not be
+     reallocated during an initial bootstrap.  Allocating anything larger than
+     64 KiB in an initial run may not work, because glibc malloc might use
+     mmap for larger allocations, and these don't work well across dumped
+     systems.  */
+  enum {
+    initial_malloc_max = (1 << 16) - 1,
+    charset_table_size_init = initial_malloc_max / sizeof (struct charset)
+  };
+
   DEFSYM (Qcharsetp, "charsetp");
 
   DEFSYM (Qascii, "ascii");
@@ -2371,9 +2373,9 @@ syms_of_charset (void)
     Vcharset_hash_table = Fmake_hash_table (2, args);
   }
 
-  charset_table_size = 128;
-  charset_table = ((struct charset *)
-		   xmalloc (sizeof (struct charset) * charset_table_size));
+  charset_table = (struct charset *) xmalloc (sizeof (struct charset)
+					      * charset_table_size_init);
+  charset_table_size = charset_table_size_init;
   charset_table_used = 0;
 
   defsubr (&Scharsetp);