code.delx.au - gnu-emacs/blob - src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4    Copyright (C) 2001 Free Software Foundation, Inc.
   5
   6 This file is part of GNU Emacs.
   7
   8 GNU Emacs is free software; you can redistribute it and/or modify
   9 it under the terms of the GNU General Public License as published by
  10 the Free Software Foundation; either version 2, or (at your option)
  11 any later version.
  12
  13 GNU Emacs is distributed in the hope that it will be useful,
  14 but WITHOUT ANY WARRANTY; without even the implied warranty of
  15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  16 GNU General Public License for more details.
  17
  18 You should have received a copy of the GNU General Public License
  19 along with GNU Emacs; see the file COPYING.  If not, write to
  20 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  21 Boston, MA 02111-1307, USA.  */
  22
  23 /* At first, see the document in `charset.h' to understand the code in
  24    this file.  */
  25
  26 #ifdef emacs
  27 #include <config.h>
  28 #endif
  29
  30 #include <stdio.h>
  31
  32 #ifdef emacs
  33
  34 #include <sys/types.h>
  35 #include "lisp.h"
  36 #include "buffer.h"
  37 #include "charset.h"
  38 #include "composite.h"
  39 #include "coding.h"
  40 #include "disptab.h"
  41
  42 #else  /* not emacs */
  43
  44 #include "mulelib.h"
  45
  46 #endif /* emacs */
  47
  48 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  49 Lisp_Object Qunknown;
  50
  51 /* Declaration of special leading-codes.  */
  52 int leading_code_private_11;    /* for private DIMENSION1 of 1-column */
  53 int leading_code_private_12;    /* for private DIMENSION1 of 2-column */
  54 int leading_code_private_21;    /* for private DIMENSION2 of 1-column */
  55 int leading_code_private_22;    /* for private DIMENSION2 of 2-column */
  56
  57 /* Declaration of special charsets.  The values are set by
  58    Fsetup_special_charsets.  */
  59 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  60 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  61 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  62 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  63 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  64 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  65 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  66
  67 Lisp_Object Qcharset_table;
  68
  69 /* A char-table containing information of each character set.  */
  70 Lisp_Object Vcharset_table;
  71
  72 /* A vector of charset symbol indexed by charset-id.  This is used
  73    only for returning charset symbol from C functions.  */
  74 Lisp_Object Vcharset_symbol_table;
  75
  76 /* A list of charset symbols ever defined.  */
  77 Lisp_Object Vcharset_list;
  78
  79 /* Vector of translation table ever defined.
  80    ID of a translation table is used to index this vector.  */
  81 Lisp_Object Vtranslation_table_vector;
  82
  83 /* A char-table for characters which may invoke auto-filling.  */
  84 Lisp_Object Vauto_fill_chars;
  85
  86 Lisp_Object Qauto_fill_chars;
  87
  88 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  89 int bytes_by_char_head[256];
  90 int width_by_char_head[256];
  91
  92 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  93    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  94 int iso_charset_table[2][2][128];
  95
  96 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
  97 unsigned char *_fetch_multibyte_char_p;
  98 int _fetch_multibyte_char_len;
  99
 100 /* Offset to add to a non-ASCII value when inserting it.  */
 101 int nonascii_insert_offset;
 102
 103 /* Translation table for converting non-ASCII unibyte characters
 104    to multibyte codes, or nil.  */
 105 Lisp_Object Vnonascii_translation_table;
 106
 107 /* List of all possible generic characters.  */
 108 Lisp_Object Vgeneric_character_list;
 109
 110 \f
 111 void
 112 invalid_character (c)
 113      int c;
 114 {
 115   error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
 116 }
 117
 118 /* Parse string STR of length LENGTH and fetch information of a
 119    character at STR.  Set BYTES to the byte length the character
 120    occupies, CHARSET, C1, C2 to proper values of the character. */
 121
 122 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 123   do {                                                                       \
 124     (c1) = *(str);                                                           \
 125     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 126     if ((bytes) == 1)                                                        \
 127       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 128     else if ((bytes) == 2)                                                   \
 129       {                                                                      \
 130         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 131           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 132         else                                                                 \
 133           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 134       }                                                                      \
 135     else if ((bytes) == 3)                                                   \
 136       {                                                                      \
 137         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 138           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 139         else                                                                 \
 140           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 141       }                                                                      \
 142     else                                                                     \
 143       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 144   } while (0)
 145
 146 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.  */
 147 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 148   ((charset) == CHARSET_ASCII                           \
 149    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 150    : ((charset) == CHARSET_8_BIT_CONTROL                \
 151       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 152       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 153          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 154          : (CHARSET_DIMENSION (charset) == 1            \
 155             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 156             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 157                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 158
 159 /* Store multi-byte form of the character C in STR.  The caller should
 160    allocate at least 4-byte area at STR in advance.  Returns the
 161    length of the multi-byte form.  If C is an invalid character code,
 162    return -1.  */
 163
 164 int
 165 char_to_string_1 (c, str)
 166      int c;
 167      unsigned char *str;
 168 {
 169   unsigned char *p = str;
 170
 171   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 172     {
 173       /* Multibyte character can't have a modifier bit.  */
 174       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 175         return -1;
 176
 177       /* For Meta, Shift, and Control modifiers, we need special care.  */
 178       if (c & CHAR_META)
 179         {
 180           /* Move the meta bit to the right place for a string.  */
 181           c = (c & ~CHAR_META) | 0x80;
 182         }
 183       if (c & CHAR_SHIFT)
 184         {
 185           /* Shift modifier is valid only with [A-Za-z].  */
 186           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 187             c &= ~CHAR_SHIFT;
 188           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 189             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 190         }
 191       if (c & CHAR_CTL)
 192         {
 193           /* Simulate the code in lread.c.  */
 194           /* Allow `\C- ' and `\C-?'.  */
 195           if (c == (CHAR_CTL | ' '))
 196             c = 0;
 197           else if (c == (CHAR_CTL | '?'))
 198             c = 127;
 199           /* ASCII control chars are made from letters (both cases),
 200              as well as the non-letters within 0100...0137.  */
 201           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 202             c &= (037 | (~0177 & ~CHAR_CTL));
 203           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 204             c &= (037 | (~0177 & ~CHAR_CTL));
 205         }
 206
 207       /* If C still has any modifier bits, just ignore it.  */
 208       c &= ~CHAR_MODIFIER_MASK;
 209     }
 210
 211   if (SINGLE_BYTE_CHAR_P (c))
 212     {
 213       if (ASCII_BYTE_P (c) || c >= 0xA0)
 214         *p++ = c;
 215       else
 216         {
 217           *p++ = LEADING_CODE_8_BIT_CONTROL;
 218           *p++ = c + 0x20;
 219         }
 220     }
 221   else if (CHAR_VALID_P (c, 0))
 222     {
 223       int charset, c1, c2;
 224
 225       SPLIT_CHAR (c, charset, c1, c2);
 226
 227       if (charset >= LEADING_CODE_EXT_11)
 228         *p++ = (charset < LEADING_CODE_EXT_12
 229                 ? LEADING_CODE_PRIVATE_11
 230                 : (charset < LEADING_CODE_EXT_21
 231                    ? LEADING_CODE_PRIVATE_12
 232                    : (charset < LEADING_CODE_EXT_22
 233                       ? LEADING_CODE_PRIVATE_21
 234                       : LEADING_CODE_PRIVATE_22)));
 235       *p++ = charset;
 236       if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32)
 237         return -1;
 238       if (c1)
 239         {
 240           *p++ = c1 | 0x80;
 241           if (c2 > 0)
 242             *p++ = c2 | 0x80;
 243         }
 244     }
 245   else
 246     return -1;
 247
 248   return (p - str);
 249 }
 250
 251
 252 /* Store multi-byte form of the character C in STR.  The caller should
 253    allocate at least 4-byte area at STR in advance.  Returns the
 254    length of the multi-byte form.  If C is an invalid character code,
 255    signal an error.
 256
 257    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 258    directly if C can be an ASCII character.  */
 259
 260 int
 261 char_to_string (c, str)
 262      int c;
 263      unsigned char *str;
 264 {
 265   int len;
 266   len = char_to_string_1 (c, str);
 267   if (len == -1)
 268     invalid_character (c);
 269   return len;
 270 }
 271
 272
 273 /* Return the non-ASCII character corresponding to multi-byte form at
 274    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 275    length of the multibyte form in *ACTUAL_LEN.
 276
 277    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 278    this function directly if you want ot handle ASCII characters as
 279    well.  */
 280
 281 int
 282 string_to_char (str, len, actual_len)
 283      const unsigned char *str;
 284      int len, *actual_len;
 285 {
 286   int c, bytes, charset, c1, c2;
 287
 288   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 289   c = MAKE_CHAR (charset, c1, c2);
 290   if (actual_len)
 291     *actual_len = bytes;
 292   return c;
 293 }
 294
 295 /* Return the length of the multi-byte form at string STR of length LEN.
 296    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 297 int
 298 multibyte_form_length (str, len)
 299      const unsigned char *str;
 300      int len;
 301 {
 302   int bytes;
 303
 304   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 305   return bytes;
 306 }
 307
 308 /* Check multibyte form at string STR of length LEN and set variables
 309    pointed by CHARSET, C1, and C2 to charset and position codes of the
 310    character at STR, and return 0.  If there's no multibyte character,
 311    return -1.  This should be used only in the macro SPLIT_STRING
 312    which checks range of STR in advance.  */
 313
 314 int
 315 split_string (str, len, charset, c1, c2)
 316      const unsigned char *str;
 317      unsigned char *c1, *c2;
 318      int len, *charset;
 319 {
 320   register int bytes, cs, code1, code2 = -1;
 321
 322   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 323   if (cs == CHARSET_ASCII)
 324     return -1;
 325   *charset = cs;
 326   *c1 = code1;
 327   *c2 = code2;
 328   return 0;
 329 }
 330
 331 /* Return 1 iff character C has valid printable glyph.
 332    Use the macro CHAR_PRINTABLE_P instead.  */
 333 int
 334 char_printable_p (c)
 335      int c;
 336 {
 337   int charset, c1, c2;
 338
 339   if (ASCII_BYTE_P (c))
 340     return 1;
 341   else if (SINGLE_BYTE_CHAR_P (c))
 342     return 0;
 343   else if (c >= MAX_CHAR)
 344     return 0;
 345
 346   SPLIT_CHAR (c, charset, c1, c2);
 347   if (! CHARSET_DEFINED_P (charset))
 348     return 0;
 349   if (CHARSET_CHARS (charset) == 94
 350       ? c1 <= 32 || c1 >= 127
 351       : c1 < 32)
 352     return 0;
 353   if (CHARSET_DIMENSION (charset) == 2
 354       && (CHARSET_CHARS (charset) == 94
 355           ? c2 <= 32 || c2 >= 127
 356           : c2 < 32))
 357     return 0;
 358   return 1;
 359 }
 360
 361 /* Translate character C by translation table TABLE.  If C
 362    is negative, translate a character specified by CHARSET, C1, and C2
 363    (C1 and C2 are code points of the character).  If no translation is
 364    found in TABLE, return C.  */
 365 int
 366 translate_char (table, c, charset, c1, c2)
 367      Lisp_Object table;
 368      int c, charset, c1, c2;
 369 {
 370   Lisp_Object ch;
 371   int alt_charset, alt_c1, alt_c2, dimension;
 372
 373   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 374   if (!CHAR_TABLE_P (table)
 375       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 376     return c;
 377
 378   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 379   dimension = CHARSET_DIMENSION (alt_charset);
 380   if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
 381     /* CH is not a generic character, just return it.  */
 382     return XFASTINT (ch);
 383
 384   /* Since CH is a generic character, we must return a specific
 385      charater which has the same position codes as C from CH.  */
 386   if (charset < 0)
 387     SPLIT_CHAR (c, charset, c1, c2);
 388   if (dimension != CHARSET_DIMENSION (charset))
 389     /* We can't make such a character because of dimension mismatch.  */
 390     return c;
 391   return MAKE_CHAR (alt_charset, c1, c2);
 392 }
 393
 394 /* Convert the unibyte character C to multibyte based on
 395    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 396    convert C to a valid multibyte character, convert it based on
 397    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 398
 399 int
 400 unibyte_char_to_multibyte (c)
 401      int c;
 402 {
 403   if (c < 0400 && c >= 0200)
 404     {
 405       int c_save = c;
 406
 407       if (! NILP (Vnonascii_translation_table))
 408         {
 409           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 410           if (c >= 0400 && ! char_valid_p (c, 0))
 411             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 412         }
 413       else if (c >= 0240 && nonascii_insert_offset > 0)
 414         {
 415           c += nonascii_insert_offset;
 416           if (c < 0400 || ! char_valid_p (c, 0))
 417             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 418         }
 419       else if (c >= 0240)
 420         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 421     }
 422   return c;
 423 }
 424
 425
 426 /* Convert the multibyte character C to unibyte 8-bit character based
 427    on Vnonascii_translation_table or nonascii_insert_offset.  If
 428    REV_TBL is non-nil, it should be a reverse table of
 429    Vnonascii_translation_table, i.e. what given by:
 430      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 431
 432 int
 433 multibyte_char_to_unibyte (c, rev_tbl)
 434      int c;
 435      Lisp_Object rev_tbl;
 436 {
 437   if (!SINGLE_BYTE_CHAR_P (c))
 438     {
 439       int c_save = c;
 440
 441       if (! CHAR_TABLE_P (rev_tbl)
 442           && CHAR_TABLE_P (Vnonascii_translation_table))
 443         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 444                                           make_number (0));
 445       if (CHAR_TABLE_P (rev_tbl))
 446         {
 447           Lisp_Object temp;
 448           temp = Faref (rev_tbl, make_number (c));
 449           if (INTEGERP (temp))
 450             c = XINT (temp);
 451           if (c >= 256)
 452             c = (c_save & 0177) + 0200;
 453         }
 454       else
 455         {
 456           if (nonascii_insert_offset > 0)
 457             c -= nonascii_insert_offset;
 458           if (c < 128 || c >= 256)
 459             c = (c_save & 0177) + 0200;
 460         }
 461     }
 462
 463   return c;
 464 }
 465
 466 \f
 467 /* Update the table Vcharset_table with the given arguments (see the
 468    document of `define-charset' for the meaning of each argument).
 469    Several other table contents are also updated.  The caller should
 470    check the validity of CHARSET-ID and the remaining arguments in
 471    advance.  */
 472
 473 void
 474 update_charset_table (charset_id, dimension, chars, width, direction,
 475                       iso_final_char, iso_graphic_plane,
 476                       short_name, long_name, description)
 477      Lisp_Object charset_id, dimension, chars, width, direction;
 478      Lisp_Object iso_final_char, iso_graphic_plane;
 479      Lisp_Object short_name, long_name, description;
 480 {
 481   int charset = XINT (charset_id);
 482   int bytes;
 483   unsigned char leading_code_base, leading_code_ext;
 484
 485   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 486     CHARSET_TABLE_ENTRY (charset)
 487       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 488
 489   if (NILP (long_name))
 490     long_name = short_name;
 491   if (NILP (description))
 492     description = long_name;
 493
 494   /* Get byte length of multibyte form, base leading-code, and
 495      extended leading-code of the charset.  See the comment under the
 496      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 497   bytes = XINT (dimension);
 498   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 499     {
 500       /* Official charset, it doesn't have an extended leading-code.  */
 501       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 502         bytes += 1; /* For a base leading-code.  */
 503       leading_code_base = charset;
 504       leading_code_ext = 0;
 505     }
 506   else
 507     {
 508       /* Private charset.  */
 509       bytes += 2; /* For base and extended leading-codes.  */
 510       leading_code_base
 511         = (charset < LEADING_CODE_EXT_12
 512            ? LEADING_CODE_PRIVATE_11
 513            : (charset < LEADING_CODE_EXT_21
 514               ? LEADING_CODE_PRIVATE_12
 515               : (charset < LEADING_CODE_EXT_22
 516                  ? LEADING_CODE_PRIVATE_21
 517                  : LEADING_CODE_PRIVATE_22)));
 518       leading_code_ext = charset;
 519       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 520         error ("Invalid dimension for the charset-ID %d", charset);
 521     }
 522
 523   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 524   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 525   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 526   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 527   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 528   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 529   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 530     = make_number (leading_code_base);
 531   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 532     = make_number (leading_code_ext);
 533   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 534   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 535     = iso_graphic_plane;
 536   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 537   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 538   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 539   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 540
 541   {
 542     /* If we have already defined a charset which has the same
 543        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 544        DIRECTION, we must update the entry REVERSE-CHARSET of both
 545        charsets.  If there's no such charset, the value of the entry
 546        is set to nil.  */
 547     int i;
 548
 549     for (i = 0; i <= MAX_CHARSET; i++)
 550       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 551         {
 552           if (CHARSET_DIMENSION (i) == XINT (dimension)
 553               && CHARSET_CHARS (i) == XINT (chars)
 554               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 555               && CHARSET_DIRECTION (i) != XINT (direction))
 556             {
 557               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 558                 = make_number (i);
 559               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 560               break;
 561             }
 562         }
 563     if (i > MAX_CHARSET)
 564       /* No such a charset.  */
 565       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 566         = make_number (-1);
 567   }
 568
 569   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 570       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 571     {
 572       bytes_by_char_head[leading_code_base] = bytes;
 573       width_by_char_head[leading_code_base] = XINT (width);
 574
 575       /* Update table emacs_code_class.  */
 576       emacs_code_class[charset] = (bytes == 2
 577                                    ? EMACS_leading_code_2
 578                                    : (bytes == 3
 579                                       ? EMACS_leading_code_3
 580                                       : EMACS_leading_code_4));
 581     }
 582
 583   /* Update table iso_charset_table.  */
 584   if (XINT (iso_final_char) >= 0
 585       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 586     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 587 }
 588
 589 #ifdef emacs
 590
 591 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 592    is invalid.  */
 593 int
 594 get_charset_id (charset_symbol)
 595      Lisp_Object charset_symbol;
 596 {
 597   Lisp_Object val;
 598   int charset;
 599
 600   return ((SYMBOLP (charset_symbol)
 601            && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
 602            && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
 603                CHARSET_VALID_P (charset)))
 604           ? charset : -1);
 605 }
 606
 607 /* Return an identification number for a new private charset of
 608    DIMENSION and WIDTH.  If there's no more room for the new charset,
 609    return 0.  */
 610 Lisp_Object
 611 get_new_private_charset_id (dimension, width)
 612      int dimension, width;
 613 {
 614   int charset, from, to;
 615
 616   if (dimension == 1)
 617     {
 618       from = LEADING_CODE_EXT_11;
 619       to = LEADING_CODE_EXT_21;
 620     }
 621   else
 622     {
 623       from = LEADING_CODE_EXT_21;
 624       to = LEADING_CODE_EXT_MAX + 1;
 625     }
 626
 627   for (charset = from; charset < to; charset++)
 628     if (!CHARSET_DEFINED_P (charset)) break;
 629
 630   return make_number (charset < to ? charset : 0);
 631 }
 632
 633 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 634   "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
 635 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
 636  treated as a private charset.\n\
 637 INFO-VECTOR is a vector of the format:\n\
 638    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
 639     SHORT-NAME LONG-NAME DESCRIPTION]\n\
 640 The meanings of each elements is as follows:\n\
 641 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
 642 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
 643 WIDTH (integer) is the number of columns a character in the charset\n\
 644 occupies on the screen: one of 0, 1, and 2.\n\
 645 \n\
 646 DIRECTION (integer) is the rendering direction of characters in the\n\
 647 charset when rendering.  If 0, render from left to right, else\n\
 648 render from right to left.\n\
 649 \n\
 650 ISO-FINAL-CHAR (character) is the final character of the\n\
 651 corresponding ISO 2022 charset.\n\
 652 It may be -1 if the charset is internal use only.\n\
 653 \n\
 654 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
 655 while encoding to variants of ISO 2022 coding system, one of the\n\
 656 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
 657 It may be -1 if the charset is internal use only.\n\
 658 \n\
 659 SHORT-NAME (string) is the short name to refer to the charset.\n\
 660 \n\
 661 LONG-NAME (string) is the long name to refer to the charset.\n\
 662 \n\
 663 DESCRIPTION (string) is the description string of the charset.")
 664   (charset_id, charset_symbol, info_vector)
 665      Lisp_Object charset_id, charset_symbol, info_vector;
 666 {
 667   Lisp_Object *vec;
 668
 669   if (!NILP (charset_id))
 670     CHECK_NUMBER (charset_id, 0);
 671   CHECK_SYMBOL (charset_symbol, 1);
 672   CHECK_VECTOR (info_vector, 2);
 673
 674   if (! NILP (charset_id))
 675     {
 676       if (! CHARSET_VALID_P (XINT (charset_id)))
 677         error ("Invalid CHARSET: %d", XINT (charset_id));
 678       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 679         error ("Already defined charset: %d", XINT (charset_id));
 680     }
 681
 682   vec = XVECTOR (info_vector)->contents;
 683   if (XVECTOR (info_vector)->size != 9
 684       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 685       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 686       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 687       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 688       || !INTEGERP (vec[4])
 689       || !(XINT (vec[4]) == -1 || XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
 690       || !INTEGERP (vec[5])
 691       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 692       || !STRINGP (vec[6])
 693       || !STRINGP (vec[7])
 694       || !STRINGP (vec[8]))
 695     error ("Invalid info-vector argument for defining charset %s",
 696            XSYMBOL (charset_symbol)->name->data);
 697
 698   if (NILP (charset_id))
 699     {
 700       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 701       if (XINT (charset_id) == 0)
 702         error ("There's no room for a new private charset %s",
 703                XSYMBOL (charset_symbol)->name->data);
 704     }
 705
 706   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 707                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 708   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 709   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 710   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 711   return Qnil;
 712 }
 713
 714 DEFUN ("generic-character-list", Fgeneric_character_list,
 715        Sgeneric_character_list, 0, 0, 0,
 716   "Return a list of all possible generic characters.\n\
 717 It includes a generic character for a charset not yet defined.")
 718   ()
 719 {
 720   return Vgeneric_character_list;
 721 }
 722
 723 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 724        Sget_unused_iso_final_char, 2, 2, 0,
 725   "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
 726 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
 727 CHARS is the number of characters in a dimension: 94 or 96.\n\
 728 \n\
 729 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
 730 If there's no unused final char for the specified kind of charset,\n\
 731 return nil.")
 732   (dimension, chars)
 733      Lisp_Object dimension, chars;
 734 {
 735   int final_char;
 736
 737   CHECK_NUMBER (dimension, 0);
 738   CHECK_NUMBER (chars, 1);
 739   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 740     error ("Invalid charset dimension %d, it should be 1 or 2",
 741            XINT (dimension));
 742   if (XINT (chars) != 94 && XINT (chars) != 96)
 743     error ("Invalid charset chars %d, it should be 94 or 96",
 744            XINT (chars));
 745   for (final_char = '0'; final_char <= '?'; final_char++)
 746     {
 747       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 748         break;
 749     }
 750   return (final_char <= '?' ? make_number (final_char) : Qnil);
 751 }
 752
 753 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 754        4, 4, 0,
 755   "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
 756 CHARSET should be defined by `defined-charset' in advance.")
 757   (dimension, chars, final_char, charset_symbol)
 758      Lisp_Object dimension, chars, final_char, charset_symbol;
 759 {
 760   int charset;
 761
 762   CHECK_NUMBER (dimension, 0);
 763   CHECK_NUMBER (chars, 1);
 764   CHECK_NUMBER (final_char, 2);
 765   CHECK_SYMBOL (charset_symbol, 3);
 766
 767   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 768     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 769   if (XINT (chars) != 94 && XINT (chars) != 96)
 770     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 771   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 772     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 773   if ((charset = get_charset_id (charset_symbol)) < 0)
 774     error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
 775
 776   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
 777   return Qnil;
 778 }
 779
 780 /* Return information about charsets in the text at PTR of NBYTES
 781    bytes, which are NCHARS characters.  The value is:
 782
 783         0: Each character is represented by one byte.  This is always
 784            true for unibyte text.
 785         1: No charsets other than ascii eight-bit-control,
 786            eight-bit-graphic, and latin-1 are found.
 787         2: Otherwise.
 788
 789    In addition, if CHARSETS is nonzero, for each found charset N, set
 790    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 791    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 792    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 793    1 (note that there's no charset whose ID is 1).  */
 794
 795 int
 796 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 797      unsigned char *ptr;
 798      int nchars, nbytes, *charsets;
 799      Lisp_Object table;
 800 {
 801   if (nchars == nbytes)
 802     {
 803       if (charsets && nbytes > 0)
 804         {
 805           unsigned char *endp = ptr + nbytes;
 806           int maskbits = 0;
 807
 808           while (ptr < endp && maskbits != 7)
 809             {
 810               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 811               ptr++;
 812             }
 813
 814           if (maskbits & 1)
 815             charsets[CHARSET_ASCII] = 1;
 816           if (maskbits & 2)
 817             charsets[CHARSET_8_BIT_CONTROL] = 1;
 818           if (maskbits & 4)
 819             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 820         }
 821       return 0;
 822     }
 823   else
 824     {
 825       int return_val = 1;
 826       int bytes, charset, c1, c2;
 827
 828       if (! CHAR_TABLE_P (table))
 829         table = Qnil;
 830
 831       while (nchars-- > 0)
 832         {
 833           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 834           ptr += bytes;
 835
 836           if (!CHARSET_DEFINED_P (charset))
 837             charset = 1;
 838           else if (! NILP (table))
 839             {
 840               int c = translate_char (table, -1, charset, c1, c2);
 841               if (c >= 0)
 842                 charset = CHAR_CHARSET (c);
 843             }
 844
 845           if (return_val == 1
 846               && charset != CHARSET_ASCII
 847               && charset != CHARSET_8_BIT_CONTROL
 848               && charset != CHARSET_8_BIT_GRAPHIC
 849               && charset != charset_latin_iso8859_1)
 850             return_val = 2;
 851
 852           if (charsets)
 853             charsets[charset] = 1;
 854           else if (return_val == 2)
 855             break;
 856         }
 857       return return_val;
 858     }
 859 }
 860
 861 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 862        2, 3, 0,
 863   "Return a list of charsets in the region between BEG and END.\n\
 864 BEG and END are buffer positions.\n\
 865 Optional arg TABLE if non-nil is a translation table to look up.\n\
 866 \n\
 867 If the region contains invalid multibyte characters,\n\
 868 `unknown' is included in the returned list.\n\
 869 \n\
 870 If the current buffer is unibyte, the returned list may contain\n\
 871 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
 872   (beg, end, table)
 873      Lisp_Object beg, end, table;
 874 {
 875   int charsets[MAX_CHARSET + 1];
 876   int from, from_byte, to, stop, stop_byte, i;
 877   Lisp_Object val;
 878
 879   validate_region (&beg, &end);
 880   from = XFASTINT (beg);
 881   stop = to = XFASTINT (end);
 882
 883   if (from < GPT && GPT < to)
 884     {
 885       stop = GPT;
 886       stop_byte = GPT_BYTE;
 887     }
 888   else
 889     stop_byte = CHAR_TO_BYTE (stop);
 890
 891   from_byte = CHAR_TO_BYTE (from);
 892
 893   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 894   while (1)
 895     {
 896       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 897                             stop_byte - from_byte, charsets, table);
 898       if (stop < to)
 899         {
 900           from = stop, from_byte = stop_byte;
 901           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 902         }
 903       else
 904         break;
 905     }
 906
 907   val = Qnil;
 908   if (charsets[1])
 909     val = Fcons (Qunknown, val);
 910   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 911     if (charsets[i])
 912       val = Fcons (CHARSET_SYMBOL (i), val);
 913   if (charsets[0])
 914     val = Fcons (Qascii, val);
 915   return val;
 916 }
 917
 918 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 919        1, 2, 0,
 920   "Return a list of charsets in STR.\n\
 921 Optional arg TABLE if non-nil is a translation table to look up.\n\
 922 \n\
 923 If the string contains invalid multibyte characters,\n\
 924 `unknown' is included in the returned list.\n\
 925 \n\
 926 If STR is unibyte, the returned list may contain\n\
 927 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
 928   (str, table)
 929      Lisp_Object str, table;
 930 {
 931   int charsets[MAX_CHARSET + 1];
 932   int i;
 933   Lisp_Object val;
 934
 935   CHECK_STRING (str, 0);
 936
 937   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 938   find_charset_in_text (XSTRING (str)->data, XSTRING (str)->size,
 939                         STRING_BYTES (XSTRING (str)), charsets, table);
 940
 941   val = Qnil;
 942   if (charsets[1])
 943     val = Fcons (Qunknown, val);
 944   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 945     if (charsets[i])
 946       val = Fcons (CHARSET_SYMBOL (i), val);
 947   if (charsets[0])
 948     val = Fcons (Qascii, val);
 949   return val;
 950 }
 951
 952 \f
 953 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 954   "")
 955   (charset, code1, code2)
 956      Lisp_Object charset, code1, code2;
 957 {
 958   int charset_id, c1, c2;
 959
 960   CHECK_NUMBER (charset, 0);
 961   charset_id = XINT (charset);
 962   if (!CHARSET_DEFINED_P (charset_id))
 963     error ("Invalid charset ID: %d", XINT (charset));
 964
 965   if (NILP (code1))
 966     c1 = 0;
 967   else
 968     {
 969       CHECK_NUMBER (code1, 1);
 970       c1 = XINT (code1);
 971     }
 972   if (NILP (code2))
 973     c2 = 0;
 974   else
 975     {
 976       CHECK_NUMBER (code2, 2);
 977       c2 = XINT (code2);
 978     }
 979
 980   if (charset_id == CHARSET_ASCII)
 981     {
 982       if (c1 < 0 || c1 > 0x7F)
 983         goto invalid_code_posints;
 984       return make_number (c1);
 985     }
 986   else if (charset_id == CHARSET_8_BIT_CONTROL)
 987     {
 988       if (NILP (code1))
 989         c1 = 0x80;
 990       else if (c1 < 0x80 || c1 > 0x9F)
 991         goto invalid_code_posints;
 992       return make_number (c1);
 993     }
 994   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
 995     {
 996       if (NILP (code1))
 997         c1 = 0xA0;
 998       else if (c1 < 0xA0 || c1 > 0xFF)
 999         goto invalid_code_posints;
1000       return make_number (c1);
1001     }
1002   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1003     goto invalid_code_posints;
1004   c1 &= 0x7F;
1005   c2 &= 0x7F;
1006   if (c1 == 0
1007       ? c2 != 0
1008       : (c2 == 0
1009          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1010          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1011     goto invalid_code_posints;
1012   return make_number (MAKE_CHAR (charset_id, c1, c2));
1013
1014  invalid_code_posints:
1015   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1016 }
1017
1018 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1019   "Return list of charset and one or two position-codes of CHAR.\n\
1020 If CHAR is invalid as a character code,\n\
1021 return a list of symbol `unknown' and CHAR.")
1022   (ch)
1023      Lisp_Object ch;
1024 {
1025   int c, charset, c1, c2;
1026
1027   CHECK_NUMBER (ch, 0);
1028   c = XFASTINT (ch);
1029   if (!CHAR_VALID_P (c, 1))
1030     return Fcons (Qunknown, Fcons (ch, Qnil));
1031   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1032   return (c2 >= 0
1033           ? Fcons (CHARSET_SYMBOL (charset),
1034                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1035           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1036 }
1037
1038 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1039   "Return charset of CHAR.")
1040   (ch)
1041      Lisp_Object ch;
1042 {
1043   CHECK_NUMBER (ch, 0);
1044
1045   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1046 }
1047
1048 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1049   "Return charset of a character in the current buffer at position POS.\n\
1050 If POS is nil, it defauls to the current point.\n\
1051 If POS is out of range, the value is nil.")
1052   (pos)
1053      Lisp_Object pos;
1054 {
1055   Lisp_Object ch;
1056   int charset;
1057
1058   ch = Fchar_after (pos);
1059   if (! INTEGERP (ch))
1060     return ch;
1061   charset = CHAR_CHARSET (XINT (ch));
1062   return CHARSET_SYMBOL (charset);
1063 }
1064
1065 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1066   "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
1067 \n\
1068 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
1069 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
1070 where as Emacs distinguishes them by charset symbol.\n\
1071 See the documentation of the function `charset-info' for the meanings of\n\
1072 DIMENSION, CHARS, and FINAL-CHAR.")
1073   (dimension, chars, final_char)
1074      Lisp_Object dimension, chars, final_char;
1075 {
1076   int charset;
1077
1078   CHECK_NUMBER (dimension, 0);
1079   CHECK_NUMBER (chars, 1);
1080   CHECK_NUMBER (final_char, 2);
1081
1082   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1083     return Qnil;
1084   return CHARSET_SYMBOL (charset);
1085 }
1086
1087 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1088    generic character.  If GENERICP is zero, return nonzero iff C is a
1089    valid normal character.  Do not call this function directly,
1090    instead use macro CHAR_VALID_P.  */
1091 int
1092 char_valid_p (c, genericp)
1093      int c, genericp;
1094 {
1095   int charset, c1, c2;
1096
1097   if (c < 0 || c >= MAX_CHAR)
1098     return 0;
1099   if (SINGLE_BYTE_CHAR_P (c))
1100     return 1;
1101   SPLIT_CHAR (c, charset, c1, c2);
1102   if (genericp)
1103     {
1104       if (c1)
1105         {
1106           if (c2 <= 0) c2 = 0x20;
1107         }
1108       else
1109         {
1110           if (c2 <= 0) c1 = c2 = 0x20;
1111         }
1112     }
1113   return (CHARSET_DEFINED_P (charset)
1114           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1115 }
1116
1117 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1118   "Return t if OBJECT is a valid normal character.\n\
1119 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
1120 a valid generic character.")
1121   (object, genericp)
1122      Lisp_Object object, genericp;
1123 {
1124   if (! NATNUMP (object))
1125     return Qnil;
1126   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1127 }
1128
1129 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1130        Sunibyte_char_to_multibyte, 1, 1, 0,
1131   "Convert the unibyte character CH to multibyte character.\n\
1132 The conversion is done based on `nonascii-translation-table' (which see)\n\
1133  or `nonascii-insert-offset' (which see).")
1134   (ch)
1135      Lisp_Object ch;
1136 {
1137   int c;
1138
1139   CHECK_NUMBER (ch, 0);
1140   c = XINT (ch);
1141   if (c < 0 || c >= 0400)
1142     error ("Invalid unibyte character: %d", c);
1143   c = unibyte_char_to_multibyte (c);
1144   if (c < 0)
1145     error ("Can't convert to multibyte character: %d", XINT (ch));
1146   return make_number (c);
1147 }
1148
1149 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1150        Smultibyte_char_to_unibyte, 1, 1, 0,
1151   "Convert the multibyte character CH to unibyte character.\n\
1152 The conversion is done based on `nonascii-translation-table' (which see)\n\
1153  or `nonascii-insert-offset' (which see).")
1154   (ch)
1155      Lisp_Object ch;
1156 {
1157   int c;
1158
1159   CHECK_NUMBER (ch, 0);
1160   c = XINT (ch);
1161   if (! CHAR_VALID_P (c, 0))
1162     error ("Invalid multibyte character: %d", c);
1163   c = multibyte_char_to_unibyte (c, Qnil);
1164   if (c < 0)
1165     error ("Can't convert to unibyte character: %d", XINT (ch));
1166   return make_number (c);
1167 }
1168
1169 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1170   "Return 1 regardless of the argument CHAR.\n\
1171 This is now an obsolete function.  We keep it just for backward compatibility.")
1172   (ch)
1173      Lisp_Object ch;
1174 {
1175   CHECK_NUMBER (ch, 0);
1176   return make_number (1);
1177 }
1178
1179 /* Return how many bytes C will occupy in a multibyte buffer.
1180    Don't call this function directly, instead use macro CHAR_BYTES.  */
1181 int
1182 char_bytes (c)
1183      int c;
1184 {
1185   int charset;
1186
1187   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1188     return 1;
1189   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1190     return 1;
1191
1192   charset = CHAR_CHARSET (c);
1193   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1194 }
1195
1196 /* Return the width of character of which multi-byte form starts with
1197    C.  The width is measured by how many columns occupied on the
1198    screen when displayed in the current buffer.  */
1199
1200 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1201   (c < 0x20                                                             \
1202    ? (c == '\t'                                                         \
1203       ? XFASTINT (current_buffer->tab_width)                            \
1204       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1205    : (c < 0x7f                                                          \
1206       ? 1                                                               \
1207       : (c == 0x7F                                                      \
1208          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1209          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1210              && BASE_LEADING_CODE_P (c))                                \
1211             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1212             : 4))))
1213
1214 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1215   "Return width of CHAR when displayed in the current buffer.\n\
1216 The width is measured by how many columns it occupies on the screen.\n\
1217 Tab is taken to occupy `tab-width' columns.")
1218   (ch)
1219        Lisp_Object ch;
1220 {
1221   Lisp_Object val, disp;
1222   int c;
1223   struct Lisp_Char_Table *dp = buffer_display_table ();
1224
1225   CHECK_NUMBER (ch, 0);
1226
1227   c = XINT (ch);
1228
1229   /* Get the way the display table would display it.  */
1230   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1231
1232   if (VECTORP (disp))
1233     XSETINT (val, XVECTOR (disp)->size);
1234   else if (SINGLE_BYTE_CHAR_P (c))
1235     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1236   else
1237     {
1238       int charset = CHAR_CHARSET (c);
1239
1240       XSETFASTINT (val, CHARSET_WIDTH (charset));
1241     }
1242   return val;
1243 }
1244
1245 /* Return width of string STR of length LEN when displayed in the
1246    current buffer.  The width is measured by how many columns it
1247    occupies on the screen.  */
1248
1249 int
1250 strwidth (str, len)
1251      unsigned char *str;
1252      int len;
1253 {
1254   return c_string_width (str, len, -1, NULL, NULL);
1255 }
1256
1257 /* Return width of string STR of length LEN when displayed in the
1258    current buffer.  The width is measured by how many columns it
1259    occupies on the screen.  If PRECISION > 0, return the width of
1260    longest substring that doesn't exceed PRECISION, and set number of
1261    characters and bytes of the substring in *NCHARS and *NBYTES
1262    respectively.  */
1263
1264 int
1265 c_string_width (str, len, precision, nchars, nbytes)
1266      unsigned char *str;
1267      int precision, *nchars, *nbytes;
1268 {
1269   int i = 0, i_byte = 0;
1270   int width = 0;
1271   int chars;
1272   struct Lisp_Char_Table *dp = buffer_display_table ();
1273
1274   while (i_byte < len)
1275     {
1276       int bytes, thiswidth;
1277       Lisp_Object val;
1278
1279       if (dp)
1280         {
1281           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1282
1283           chars = 1;
1284           val = DISP_CHAR_VECTOR (dp, c);
1285           if (VECTORP (val))
1286             thiswidth = XVECTOR (val)->size;
1287           else
1288             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1289         }
1290       else
1291         {
1292           chars = 1;
1293           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1294           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1295         }
1296
1297       if (precision > 0
1298           && (width + thiswidth > precision))
1299         {
1300           *nchars = i;
1301           *nbytes = i_byte;
1302           return width;
1303         }
1304       i++;
1305       i_byte += bytes;
1306       width += thiswidth;
1307   }
1308
1309   if (precision > 0)
1310     {
1311       *nchars = i;
1312       *nbytes = i_byte;
1313     }
1314
1315   return width;
1316 }
1317
1318 /* Return width of Lisp string STRING when displayed in the current
1319    buffer.  The width is measured by how many columns it occupies on
1320    the screen while paying attention to compositions.  If PRECISION >
1321    0, return the width of longest substring that doesn't exceed
1322    PRECISION, and set number of characters and bytes of the substring
1323    in *NCHARS and *NBYTES respectively.  */
1324
1325 int
1326 lisp_string_width (string, precision, nchars, nbytes)
1327      Lisp_Object string;
1328      int precision, *nchars, *nbytes;
1329 {
1330   int len = XSTRING (string)->size;
1331   int len_byte = STRING_BYTES (XSTRING (string));
1332   unsigned char *str = XSTRING (string)->data;
1333   int i = 0, i_byte = 0;
1334   int width = 0;
1335   struct Lisp_Char_Table *dp = buffer_display_table ();
1336
1337   while (i < len)
1338     {
1339       int chars, bytes, thiswidth;
1340       Lisp_Object val;
1341       int cmp_id;
1342       int ignore, end;
1343
1344       if (find_composition (i, -1, &ignore, &end, &val, string)
1345           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1346               >= 0))
1347         {
1348           thiswidth = composition_table[cmp_id]->width;
1349           chars = end - i;
1350           bytes = string_char_to_byte (string, end) - i_byte;
1351         }
1352       else if (dp)
1353         {
1354           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1355
1356           chars = 1;
1357           val = DISP_CHAR_VECTOR (dp, c);
1358           if (VECTORP (val))
1359             thiswidth = XVECTOR (val)->size;
1360           else
1361             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1362         }
1363       else
1364         {
1365           chars = 1;
1366           PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1367           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1368         }
1369
1370       if (precision > 0
1371           && (width + thiswidth > precision))
1372         {
1373           *nchars = i;
1374           *nbytes = i_byte;
1375           return width;
1376         }
1377       i += chars;
1378       i_byte += bytes;
1379       width += thiswidth;
1380   }
1381
1382   if (precision > 0)
1383     {
1384       *nchars = i;
1385       *nbytes = i_byte;
1386     }
1387
1388   return width;
1389 }
1390
1391 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1392   "Return width of STRING when displayed in the current buffer.\n\
1393 Width is measured by how many columns it occupies on the screen.\n\
1394 When calculating width of a multibyte character in STRING,\n\
1395 only the base leading-code is considered; the validity of\n\
1396 the following bytes is not checked.  Tabs in STRING are always\n\
1397 taken to occupy `tab-width' columns.")
1398   (str)
1399      Lisp_Object str;
1400 {
1401   Lisp_Object val;
1402
1403   CHECK_STRING (str, 0);
1404   XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
1405   return val;
1406 }
1407
1408 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1409   "Return the direction of CHAR.\n\
1410 The returned value is 0 for left-to-right and 1 for right-to-left.")
1411   (ch)
1412      Lisp_Object ch;
1413 {
1414   int charset;
1415
1416   CHECK_NUMBER (ch, 0);
1417   charset = CHAR_CHARSET (XFASTINT (ch));
1418   if (!CHARSET_DEFINED_P (charset))
1419     invalid_character (XINT (ch));
1420   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1421 }
1422
1423 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1424   "Return number of characters between BEG and END.")
1425   (beg, end)
1426      Lisp_Object beg, end;
1427 {
1428   int from, to;
1429
1430   CHECK_NUMBER_COERCE_MARKER (beg, 0);
1431   CHECK_NUMBER_COERCE_MARKER (end, 1);
1432
1433   from = min (XFASTINT (beg), XFASTINT (end));
1434   to = max (XFASTINT (beg), XFASTINT (end));
1435
1436   return make_number (to - from);
1437 }
1438
1439 /* Return the number of characters in the NBYTES bytes at PTR.
1440    This works by looking at the contents and checking for multibyte sequences.
1441    However, if the current buffer has enable-multibyte-characters = nil,
1442    we treat each byte as a character.  */
1443
1444 int
1445 chars_in_text (ptr, nbytes)
1446      unsigned char *ptr;
1447      int nbytes;
1448 {
1449   /* current_buffer is null at early stages of Emacs initialization.  */
1450   if (current_buffer == 0
1451       || NILP (current_buffer->enable_multibyte_characters))
1452     return nbytes;
1453
1454   return multibyte_chars_in_text (ptr, nbytes);
1455 }
1456
1457 /* Return the number of characters in the NBYTES bytes at PTR.
1458    This works by looking at the contents and checking for multibyte sequences.
1459    It ignores enable-multibyte-characters.  */
1460
1461 int
1462 multibyte_chars_in_text (ptr, nbytes)
1463      unsigned char *ptr;
1464      int nbytes;
1465 {
1466   unsigned char *endp;
1467   int chars, bytes;
1468
1469   endp = ptr + nbytes;
1470   chars = 0;
1471
1472   while (ptr < endp)
1473     {
1474       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1475       ptr += bytes;
1476       chars++;
1477     }
1478
1479   return chars;
1480 }
1481
1482 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1483    count the numbers of characters and bytes in it.  On counting
1484    bytes, pay attention to the fact that 8-bit characters in the range
1485    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1486 void
1487 parse_str_as_multibyte (str, len, nchars, nbytes)
1488      unsigned char *str;
1489      int len, *nchars, *nbytes;
1490 {
1491   unsigned char *endp = str + len;
1492   int n, chars = 0, bytes = 0;
1493
1494   while (str < endp)
1495     {
1496       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1497         str += n, bytes += n;
1498       else
1499         str++, bytes += 2;
1500       chars++;
1501     }
1502   *nchars = chars;
1503   *nbytes = bytes;
1504   return;
1505 }
1506
1507 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1508    It actually converts only 8-bit characters in the range 0x80..0x9F
1509    that don't contruct multibyte characters to multibyte forms.  If
1510    NCHARS is nonzero, set *NCHARS to the number of characters in the
1511    text.  It is assured that we can use LEN bytes at STR as a work
1512    area and that is enough.  Return the number of bytes of the
1513    resulting text.  */
1514
1515 int
1516 str_as_multibyte (str, len, nbytes, nchars)
1517      unsigned char *str;
1518      int len, nbytes, *nchars;
1519 {
1520   unsigned char *p = str, *endp = str + nbytes;
1521   unsigned char *to;
1522   int chars = 0;
1523   int n;
1524
1525   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1526     p += n, chars++;
1527   if (nchars)
1528     *nchars = chars;
1529   if (p == endp)
1530     return nbytes;
1531
1532   to = p;
1533   nbytes = endp - p;
1534   endp = str + len;
1535   safe_bcopy (p, endp - nbytes, nbytes);
1536   p = endp - nbytes;
1537   while (p < endp)
1538     {
1539       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1540         {
1541           while (n--)
1542             *to++ = *p++;
1543         }
1544       else
1545         {
1546           *to++ = LEADING_CODE_8_BIT_CONTROL;
1547           *to++ = *p++ + 0x20;
1548         }
1549       chars++;
1550     }
1551   if (nchars)
1552     *nchars = chars;
1553   return (to - str);
1554 }
1555
1556 /* Parse unibyte string at STR of LEN bytes, and return the number of
1557    bytes it may ocupy when converted to multibyte string by
1558    `str_to_multibyte'.  */
1559
1560 int
1561 parse_str_to_multibyte (str, len)
1562      unsigned char *str;
1563      int len;
1564 {
1565   unsigned char *endp = str + len;
1566   int bytes;
1567
1568   for (bytes = 0; str < endp; str++)
1569     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1570   return bytes;
1571 }
1572
1573 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1574    that contains the same single-byte characters.  It actually
1575    converts all 8-bit characters to multibyte forms.  It is assured
1576    that we can use LEN bytes at STR as a work area and that is
1577    enough.  */
1578
1579 int
1580 str_to_multibyte (str, len, bytes)
1581      unsigned char *str;
1582      int len, bytes;
1583 {
1584   unsigned char *p = str, *endp = str + bytes;
1585   unsigned char *to;
1586
1587   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1588   if (p == endp)
1589     return bytes;
1590   to = p;
1591   bytes = endp - p;
1592   endp = str + len;
1593   safe_bcopy (p, endp - bytes, bytes);
1594   p = endp - bytes;
1595   while (p < endp)
1596     {
1597       if (*p < 0x80 || *p >= 0xA0)
1598         *to++ = *p++;
1599       else
1600         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1601     }
1602   return (to - str);
1603 }
1604
1605 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1606    actually converts only 8-bit characters in the range 0x80..0x9F to
1607    unibyte forms.  */
1608
1609 int
1610 str_as_unibyte (str, bytes)
1611      unsigned char *str;
1612      int bytes;
1613 {
1614   unsigned char *p = str, *endp = str + bytes;
1615   unsigned char *to = str;
1616
1617   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1618   to = p;
1619   while (p < endp)
1620     {
1621       if (*p == LEADING_CODE_8_BIT_CONTROL)
1622         *to++ = *(p + 1) - 0x20, p += 2;
1623       else
1624         *to++ = *p++;
1625     }
1626   return (to - str);
1627 }
1628
1629 \f
1630 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
1631   "Concatenate all the argument characters and make the result a string.")
1632   (n, args)
1633      int n;
1634      Lisp_Object *args;
1635 {
1636   int i;
1637   unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
1638   unsigned char *p = buf;
1639   int c;
1640   int multibyte = 0;
1641
1642   for (i = 0; i < n; i++)
1643     {
1644       CHECK_NUMBER (args[i], 0);
1645       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1646         multibyte = 1;
1647     }
1648
1649   for (i = 0; i < n; i++)
1650     {
1651       c = XINT (args[i]);
1652       if (multibyte)
1653         p += CHAR_STRING (c, p);
1654       else
1655         *p++ = c;
1656     }
1657
1658   return make_string_from_bytes (buf, n, p - buf);
1659 }
1660
1661 #endif /* emacs */
1662 \f
1663 int
1664 charset_id_internal (charset_name)
1665      char *charset_name;
1666 {
1667   Lisp_Object val;
1668
1669   val= Fget (intern (charset_name), Qcharset);
1670   if (!VECTORP (val))
1671     error ("Charset %s is not defined", charset_name);
1672
1673   return (XINT (XVECTOR (val)->contents[0]));
1674 }
1675
1676 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1677        Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1678    ()
1679 {
1680   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1681   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1682   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1683   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1684   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1685   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1686   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1687   return Qnil;
1688 }
1689
1690 void
1691 init_charset_once ()
1692 {
1693   int i, j, k;
1694
1695   staticpro (&Vcharset_table);
1696   staticpro (&Vcharset_symbol_table);
1697   staticpro (&Vgeneric_character_list);
1698
1699   /* This has to be done here, before we call Fmake_char_table.  */
1700   Qcharset_table = intern ("charset-table");
1701   staticpro (&Qcharset_table);
1702
1703   /* Intern this now in case it isn't already done.
1704      Setting this variable twice is harmless.
1705      But don't staticpro it here--that is done in alloc.c.  */
1706   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1707
1708   /* Now we are ready to set up this property, so we can
1709      create the charset table.  */
1710   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1711   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1712
1713   Qunknown = intern ("unknown");
1714   staticpro (&Qunknown);
1715   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1716                                         Qunknown);
1717
1718   /* Setup tables.  */
1719   for (i = 0; i < 2; i++)
1720     for (j = 0; j < 2; j++)
1721       for (k = 0; k < 128; k++)
1722         iso_charset_table [i][j][k] = -1;
1723
1724   for (i = 0; i < 256; i++)
1725     bytes_by_char_head[i] = 1;
1726   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1727   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1728   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1729   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1730
1731   for (i = 0; i < 128; i++)
1732     width_by_char_head[i] = 1;
1733   for (; i < 256; i++)
1734     width_by_char_head[i] = 4;
1735   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1736   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1737   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1738   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1739
1740   {
1741     Lisp_Object val;
1742
1743     val = Qnil;
1744     for (i = 0x81; i < 0x90; i++)
1745       val = Fcons (make_number ((i - 0x70) << 7), val);
1746     for (; i < 0x9A; i++)
1747       val = Fcons (make_number ((i - 0x8F) << 14), val);
1748     for (i = 0xA0; i < 0xF0; i++)
1749       val = Fcons (make_number ((i - 0x70) << 7), val);
1750     for (; i < 0xFF; i++)
1751       val = Fcons (make_number ((i - 0xE0) << 14), val);
1752     Vgeneric_character_list = Fnreverse (val);
1753   }
1754
1755   nonascii_insert_offset = 0;
1756   Vnonascii_translation_table = Qnil;
1757 }
1758
1759 #ifdef emacs
1760
1761 void
1762 syms_of_charset ()
1763 {
1764   Qcharset = intern ("charset");
1765   staticpro (&Qcharset);
1766
1767   Qascii = intern ("ascii");
1768   staticpro (&Qascii);
1769
1770   Qeight_bit_control = intern ("eight-bit-control");
1771   staticpro (&Qeight_bit_control);
1772
1773   Qeight_bit_graphic = intern ("eight-bit-graphic");
1774   staticpro (&Qeight_bit_graphic);
1775
1776   /* Define special charsets ascii, eight-bit-control, and
1777      eight-bit-graphic.  */
1778   update_charset_table (make_number (CHARSET_ASCII),
1779                         make_number (1), make_number (94),
1780                         make_number (1),
1781                         make_number (0),
1782                         make_number ('B'),
1783                         make_number (0),
1784                         build_string ("ASCII"),
1785                         Qnil,   /* same as above */
1786                         build_string ("ASCII (ISO646 IRV)"));
1787   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1788   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1789
1790   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1791                         make_number (1), make_number (96),
1792                         make_number (4),
1793                         make_number (0),
1794                         make_number (-1),
1795                         make_number (-1),
1796                         build_string ("8-bit control code (0x80..0x9F)"),
1797                         Qnil,   /* same as above */
1798                         Qnil);  /* same as above */
1799   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1800   Fput (Qeight_bit_control, Qcharset,
1801         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1802
1803   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1804                         make_number (1), make_number (96),
1805                         make_number (4),
1806                         make_number (0),
1807                         make_number (-1),
1808                         make_number (-1),
1809                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1810                         Qnil,   /* same as above */
1811                         Qnil);  /* same as above */
1812   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1813   Fput (Qeight_bit_graphic, Qcharset,
1814         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1815
1816   Qauto_fill_chars = intern ("auto-fill-chars");
1817   staticpro (&Qauto_fill_chars);
1818   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1819
1820   defsubr (&Sdefine_charset);
1821   defsubr (&Sgeneric_character_list);
1822   defsubr (&Sget_unused_iso_final_char);
1823   defsubr (&Sdeclare_equiv_charset);
1824   defsubr (&Sfind_charset_region);
1825   defsubr (&Sfind_charset_string);
1826   defsubr (&Smake_char_internal);
1827   defsubr (&Ssplit_char);
1828   defsubr (&Schar_charset);
1829   defsubr (&Scharset_after);
1830   defsubr (&Siso_charset);
1831   defsubr (&Schar_valid_p);
1832   defsubr (&Sunibyte_char_to_multibyte);
1833   defsubr (&Smultibyte_char_to_unibyte);
1834   defsubr (&Schar_bytes);
1835   defsubr (&Schar_width);
1836   defsubr (&Sstring_width);
1837   defsubr (&Schar_direction);
1838   defsubr (&Schars_in_region);
1839   defsubr (&Sstring);
1840   defsubr (&Ssetup_special_charsets);
1841
1842   DEFVAR_LISP ("charset-list", &Vcharset_list,
1843     "List of charsets ever defined.");
1844   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1845                                         Fcons (Qeight_bit_graphic, Qnil)));
1846
1847   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1848     "Vector of cons cell of a symbol and translation table ever defined.\n\
1849 An ID of a translation table is an index of this vector.");
1850   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1851
1852   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1853     "Leading-code of private TYPE9N charset of column-width 1.");
1854   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1855
1856   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1857     "Leading-code of private TYPE9N charset of column-width 2.");
1858   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1859
1860   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1861     "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1862   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1863
1864   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1865     "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1866   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1867
1868   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1869     "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
1870 This is used for converting unibyte text to multibyte,\n\
1871 and for inserting character codes specified by number.\n\n\
1872 This serves to convert a Latin-1 or similar 8-bit character code\n\
1873 to the corresponding Emacs multibyte character code.\n\
1874 Typically the value should be (- (make-char CHARSET 0) 128),\n\
1875 for your choice of character set.\n\
1876 If `nonascii-translation-table' is non-nil, it overrides this variable.");
1877   nonascii_insert_offset = 0;
1878
1879   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1880     "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
1881 This is used for converting unibyte text to multibyte,\n\
1882 and for inserting character codes specified by number.\n\n\
1883 Conversion is performed only when multibyte characters are enabled,\n\
1884 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1885 to the corresponding Emacs character code.\n\n\
1886 If this is nil, `nonascii-insert-offset' is used instead.\n\
1887 See also the docstring of `make-translation-table'.");
1888   Vnonascii_translation_table = Qnil;
1889
1890   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1891     "A char-table for characters which invoke auto-filling.\n\
1892 Such characters have value t in this table.");
1893   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1894   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1895   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1896 }
1897
1898 #endif /* emacs */