code.delx.au - gnu-emacs/blob - src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 2001, 2002, 2003, 2004, 2005,
   3                  2006, 2007, 2008 Free Software Foundation, Inc.
   4    Copyright (C) 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004,
   5      2005, 2006, 2007, 2008
   6      National Institute of Advanced Industrial Science and Technology (AIST)
   7      Registration Number H14PRO021
   8
   9 This file is part of GNU Emacs.
  10
  11 GNU Emacs is free software; you can redistribute it and/or modify
  12 it under the terms of the GNU General Public License as published by
  13 the Free Software Foundation; either version 3, or (at your option)
  14 any later version.
  15
  16 GNU Emacs is distributed in the hope that it will be useful,
  17 but WITHOUT ANY WARRANTY; without even the implied warranty of
  18 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  19 GNU General Public License for more details.
  20
  21 You should have received a copy of the GNU General Public License
  22 along with GNU Emacs; see the file COPYING.  If not, write to
  23 the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
  24 Boston, MA 02110-1301, USA.  */
  25
  26 /* At first, see the document in `charset.h' to understand the code in
  27    this file.  */
  28
  29 #ifdef emacs
  30 #include <config.h>
  31 #endif
  32
  33 #include <stdio.h>
  34
  35 #ifdef emacs
  36
  37 #include <sys/types.h>
  38 #include "lisp.h"
  39 #include "buffer.h"
  40 #include "charset.h"
  41 #include "composite.h"
  42 #include "coding.h"
  43 #include "disptab.h"
  44
  45 #else  /* not emacs */
  46
  47 #include "mulelib.h"
  48
  49 #endif /* emacs */
  50
  51 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  52 Lisp_Object Qunknown;
  53
  54 /* Declaration of special leading-codes.  */
  55 EMACS_INT leading_code_private_11; /* for private DIMENSION1 of 1-column */
  56 EMACS_INT leading_code_private_12; /* for private DIMENSION1 of 2-column */
  57 EMACS_INT leading_code_private_21; /* for private DIMENSION2 of 1-column */
  58 EMACS_INT leading_code_private_22; /* for private DIMENSION2 of 2-column */
  59
  60 /* Declaration of special charsets.  The values are set by
  61    Fsetup_special_charsets.  */
  62 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  63 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  64 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  65 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  66 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  67 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  68 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  69 int charset_mule_unicode_0100_24ff;
  70 int charset_mule_unicode_2500_33ff;
  71 int charset_mule_unicode_e000_ffff;
  72
  73 Lisp_Object Qcharset_table;
  74
  75 /* A char-table containing information of each character set.  */
  76 Lisp_Object Vcharset_table;
  77
  78 /* A vector of charset symbol indexed by charset-id.  This is used
  79    only for returning charset symbol from C functions.  */
  80 Lisp_Object Vcharset_symbol_table;
  81
  82 /* A list of charset symbols ever defined.  */
  83 Lisp_Object Vcharset_list;
  84
  85 /* Vector of translation table ever defined.
  86    ID of a translation table is used to index this vector.  */
  87 Lisp_Object Vtranslation_table_vector;
  88
  89 /* A char-table for characters which may invoke auto-filling.  */
  90 Lisp_Object Vauto_fill_chars;
  91
  92 Lisp_Object Qauto_fill_chars;
  93
  94 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  95 int bytes_by_char_head[256];
  96 int width_by_char_head[256];
  97
  98 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  99    CHARS, and FINAL-CHAR) to Emacs' charset.  */
 100 int iso_charset_table[2][2][128];
 101
 102 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
 103 unsigned char *_fetch_multibyte_char_p;
 104 int _fetch_multibyte_char_len;
 105
 106 /* Offset to add to a non-ASCII value when inserting it.  */
 107 EMACS_INT nonascii_insert_offset;
 108
 109 /* Translation table for converting non-ASCII unibyte characters
 110    to multibyte codes, or nil.  */
 111 Lisp_Object Vnonascii_translation_table;
 112
 113 /* List of all possible generic characters.  */
 114 Lisp_Object Vgeneric_character_list;
 115
 116 \f
 117 void
 118 invalid_character (c)
 119      int c;
 120 {
 121   error ("Invalid character: %d, #o%o, #x%x", c, c, c);
 122 }
 123
 124 /* Parse string STR of length LENGTH and fetch information of a
 125    character at STR.  Set BYTES to the byte length the character
 126    occupies, CHARSET, C1, C2 to proper values of the character. */
 127
 128 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 129   do {                                                                       \
 130     (c1) = *(str);                                                           \
 131     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 132     if ((bytes) == 1)                                                        \
 133       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 134     else if ((bytes) == 2)                                                   \
 135       {                                                                      \
 136         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 137           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 138         else                                                                 \
 139           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 140       }                                                                      \
 141     else if ((bytes) == 3)                                                   \
 142       {                                                                      \
 143         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 144           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 145         else                                                                 \
 146           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 147       }                                                                      \
 148     else                                                                     \
 149       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 150   } while (0)
 151
 152 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.
 153    Note that this intentionally allows invalid components, such
 154    as 0xA0 0xA0, because there exist many files that contain
 155    such invalid byte sequences, especially in EUC-GB. */
 156 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 157   ((charset) == CHARSET_ASCII                           \
 158    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 159    : ((charset) == CHARSET_8_BIT_CONTROL                \
 160       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 161       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 162          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 163          : (CHARSET_DIMENSION (charset) == 1            \
 164             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 165             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 166                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 167
 168 /* Store multi-byte form of the character C in STR.  The caller should
 169    allocate at least 4-byte area at STR in advance.  Returns the
 170    length of the multi-byte form.  If C is an invalid character code,
 171    return -1.  */
 172
 173 int
 174 char_to_string_1 (c, str)
 175      int c;
 176      unsigned char *str;
 177 {
 178   unsigned char *p = str;
 179
 180   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 181     {
 182       /* Multibyte character can't have a modifier bit.  */
 183       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 184         return -1;
 185
 186       /* For Meta, Shift, and Control modifiers, we need special care.  */
 187       if (c & CHAR_META)
 188         {
 189           /* Move the meta bit to the right place for a string.  */
 190           c = (c & ~CHAR_META) | 0x80;
 191         }
 192       if (c & CHAR_SHIFT)
 193         {
 194           /* Shift modifier is valid only with [A-Za-z].  */
 195           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 196             c &= ~CHAR_SHIFT;
 197           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 198             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 199         }
 200       if (c & CHAR_CTL)
 201         {
 202           /* Simulate the code in lread.c.  */
 203           /* Allow `\C- ' and `\C-?'.  */
 204           if (c == (CHAR_CTL | ' '))
 205             c = 0;
 206           else if (c == (CHAR_CTL | '?'))
 207             c = 127;
 208           /* ASCII control chars are made from letters (both cases),
 209              as well as the non-letters within 0100...0137.  */
 210           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 211             c &= (037 | (~0177 & ~CHAR_CTL));
 212           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 213             c &= (037 | (~0177 & ~CHAR_CTL));
 214         }
 215
 216       /* If C still has any modifier bits, just ignore it.  */
 217       c &= ~CHAR_MODIFIER_MASK;
 218     }
 219
 220   if (SINGLE_BYTE_CHAR_P (c))
 221     {
 222       if (ASCII_BYTE_P (c) || c >= 0xA0)
 223         *p++ = c;
 224       else
 225         {
 226           *p++ = LEADING_CODE_8_BIT_CONTROL;
 227           *p++ = c + 0x20;
 228         }
 229     }
 230   else if (CHAR_VALID_P (c, 0))
 231     {
 232       int charset, c1, c2;
 233
 234       SPLIT_CHAR (c, charset, c1, c2);
 235
 236       if (charset >= LEADING_CODE_EXT_11)
 237         *p++ = (charset < LEADING_CODE_EXT_12
 238                 ? LEADING_CODE_PRIVATE_11
 239                 : (charset < LEADING_CODE_EXT_21
 240                    ? LEADING_CODE_PRIVATE_12
 241                    : (charset < LEADING_CODE_EXT_22
 242                       ? LEADING_CODE_PRIVATE_21
 243                       : LEADING_CODE_PRIVATE_22)));
 244       *p++ = charset;
 245       if ((c1 > 0 && c1 < 32) || (c2 > 0 && c2 < 32))
 246         return -1;
 247       if (c1)
 248         {
 249           *p++ = c1 | 0x80;
 250           if (c2 > 0)
 251             *p++ = c2 | 0x80;
 252         }
 253     }
 254   else
 255     return -1;
 256
 257   return (p - str);
 258 }
 259
 260
 261 /* Store multi-byte form of the character C in STR.  The caller should
 262    allocate at least 4-byte area at STR in advance.  Returns the
 263    length of the multi-byte form.  If C is an invalid character code,
 264    signal an error.
 265
 266    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 267    directly if C can be an ASCII character.  */
 268
 269 int
 270 char_to_string (c, str)
 271      int c;
 272      unsigned char *str;
 273 {
 274   int len;
 275   len = char_to_string_1 (c, str);
 276   if (len == -1)
 277     invalid_character (c);
 278   return len;
 279 }
 280
 281
 282 /* Return the non-ASCII character corresponding to multi-byte form at
 283    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 284    length of the multibyte form in *ACTUAL_LEN.
 285
 286    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 287    this function directly if you want ot handle ASCII characters as
 288    well.  */
 289
 290 int
 291 string_to_char (str, len, actual_len)
 292      const unsigned char *str;
 293      int len, *actual_len;
 294 {
 295   int c, bytes, charset, c1, c2;
 296
 297   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 298   c = MAKE_CHAR (charset, c1, c2);
 299   if (actual_len)
 300     *actual_len = bytes;
 301   return c;
 302 }
 303
 304 /* Return the length of the multi-byte form at string STR of length LEN.
 305    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 306 int
 307 multibyte_form_length (str, len)
 308      const unsigned char *str;
 309      int len;
 310 {
 311   int bytes;
 312
 313   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 314   return bytes;
 315 }
 316
 317 /* Check multibyte form at string STR of length LEN and set variables
 318    pointed by CHARSET, C1, and C2 to charset and position codes of the
 319    character at STR, and return 0.  If there's no multibyte character,
 320    return -1.  This should be used only in the macro SPLIT_STRING
 321    which checks range of STR in advance.  */
 322
 323 int
 324 split_string (str, len, charset, c1, c2)
 325      const unsigned char *str;
 326      unsigned char *c1, *c2;
 327      int len, *charset;
 328 {
 329   register int bytes, cs, code1, code2 = -1;
 330
 331   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 332   if (cs == CHARSET_ASCII)
 333     return -1;
 334   *charset = cs;
 335   *c1 = code1;
 336   *c2 = code2;
 337   return 0;
 338 }
 339
 340 /* Return 1 if character C has valid printable glyph.
 341    Use the macro CHAR_PRINTABLE_P instead.  */
 342 int
 343 char_printable_p (c)
 344      int c;
 345 {
 346   int charset, c1, c2;
 347
 348   if (ASCII_BYTE_P (c))
 349     return 1;
 350   else if (SINGLE_BYTE_CHAR_P (c))
 351     return 0;
 352   else if (c >= MAX_CHAR)
 353     return 0;
 354
 355   SPLIT_CHAR (c, charset, c1, c2);
 356   if (! CHARSET_DEFINED_P (charset))
 357     return 0;
 358   if (CHARSET_CHARS (charset) == 94
 359       ? c1 <= 32 || c1 >= 127
 360       : c1 < 32)
 361     return 0;
 362   if (CHARSET_DIMENSION (charset) == 2
 363       && (CHARSET_CHARS (charset) == 94
 364           ? c2 <= 32 || c2 >= 127
 365           : c2 < 32))
 366     return 0;
 367   return 1;
 368 }
 369
 370 /* Translate character C by translation table TABLE.  If C
 371    is negative, translate a character specified by CHARSET, C1, and C2
 372    (C1 and C2 are code points of the character).  If no translation is
 373    found in TABLE, return C.  */
 374 int
 375 translate_char (table, c, charset, c1, c2)
 376      Lisp_Object table;
 377      int c, charset, c1, c2;
 378 {
 379   Lisp_Object ch;
 380   int alt_charset, alt_c1, alt_c2, dimension;
 381
 382   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 383   if (!CHAR_TABLE_P (table)
 384       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 385     return c;
 386
 387   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 388   dimension = CHARSET_DIMENSION (alt_charset);
 389   if ((dimension == 1 && alt_c1 > 0) || (dimension == 2 && alt_c2 > 0))
 390     /* CH is not a generic character, just return it.  */
 391     return XFASTINT (ch);
 392
 393   /* Since CH is a generic character, we must return a specific
 394      charater which has the same position codes as C from CH.  */
 395   if (charset < 0)
 396     SPLIT_CHAR (c, charset, c1, c2);
 397   if (dimension != CHARSET_DIMENSION (charset))
 398     /* We can't make such a character because of dimension mismatch.  */
 399     return c;
 400   return MAKE_CHAR (alt_charset, c1, c2);
 401 }
 402
 403 /* Convert the unibyte character C to multibyte based on
 404    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 405    convert C to a valid multibyte character, convert it based on
 406    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 407
 408 int
 409 unibyte_char_to_multibyte (c)
 410      int c;
 411 {
 412   if (c < 0400 && c >= 0200)
 413     {
 414       int c_save = c;
 415
 416       if (! NILP (Vnonascii_translation_table))
 417         {
 418           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 419           if (c >= 0400 && ! char_valid_p (c, 0))
 420             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 421         }
 422       else if (c >= 0240 && nonascii_insert_offset > 0)
 423         {
 424           c += nonascii_insert_offset;
 425           if (c < 0400 || ! char_valid_p (c, 0))
 426             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 427         }
 428       else if (c >= 0240)
 429         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 430     }
 431   return c;
 432 }
 433
 434
 435 /* Convert the multibyte character C to unibyte 8-bit character based
 436    on Vnonascii_translation_table or nonascii_insert_offset.  If
 437    REV_TBL is non-nil, it should be a reverse table of
 438    Vnonascii_translation_table, i.e. what given by:
 439      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 440
 441 int
 442 multibyte_char_to_unibyte (c, rev_tbl)
 443      int c;
 444      Lisp_Object rev_tbl;
 445 {
 446   if (!SINGLE_BYTE_CHAR_P (c))
 447     {
 448       int c_save = c;
 449
 450       if (! CHAR_TABLE_P (rev_tbl)
 451           && CHAR_TABLE_P (Vnonascii_translation_table))
 452         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 453                                           make_number (0));
 454       if (CHAR_TABLE_P (rev_tbl))
 455         {
 456           Lisp_Object temp;
 457           temp = Faref (rev_tbl, make_number (c));
 458           if (INTEGERP (temp))
 459             c = XINT (temp);
 460           if (c >= 256)
 461             c = (c_save & 0177) + 0200;
 462         }
 463       else
 464         {
 465           if (nonascii_insert_offset > 0)
 466             c -= nonascii_insert_offset;
 467           if (c < 128 || c >= 256)
 468             c = (c_save & 0177) + 0200;
 469         }
 470     }
 471
 472   return c;
 473 }
 474
 475 \f
 476 /* Update the table Vcharset_table with the given arguments (see the
 477    document of `define-charset' for the meaning of each argument).
 478    Several other table contents are also updated.  The caller should
 479    check the validity of CHARSET-ID and the remaining arguments in
 480    advance.  */
 481
 482 void
 483 update_charset_table (charset_id, dimension, chars, width, direction,
 484                       iso_final_char, iso_graphic_plane,
 485                       short_name, long_name, description)
 486      Lisp_Object charset_id, dimension, chars, width, direction;
 487      Lisp_Object iso_final_char, iso_graphic_plane;
 488      Lisp_Object short_name, long_name, description;
 489 {
 490   int charset = XINT (charset_id);
 491   int bytes;
 492   unsigned char leading_code_base, leading_code_ext;
 493
 494   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 495     CHARSET_TABLE_ENTRY (charset)
 496       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 497
 498   if (NILP (long_name))
 499     long_name = short_name;
 500   if (NILP (description))
 501     description = long_name;
 502
 503   /* Get byte length of multibyte form, base leading-code, and
 504      extended leading-code of the charset.  See the comment under the
 505      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 506   bytes = XINT (dimension);
 507   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 508     {
 509       /* Official charset, it doesn't have an extended leading-code.  */
 510       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 511         bytes += 1; /* For a base leading-code.  */
 512       leading_code_base = charset;
 513       leading_code_ext = 0;
 514     }
 515   else
 516     {
 517       /* Private charset.  */
 518       bytes += 2; /* For base and extended leading-codes.  */
 519       leading_code_base
 520         = (charset < LEADING_CODE_EXT_12
 521            ? LEADING_CODE_PRIVATE_11
 522            : (charset < LEADING_CODE_EXT_21
 523               ? LEADING_CODE_PRIVATE_12
 524               : (charset < LEADING_CODE_EXT_22
 525                  ? LEADING_CODE_PRIVATE_21
 526                  : LEADING_CODE_PRIVATE_22)));
 527       leading_code_ext = charset;
 528       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 529         error ("Invalid dimension for the charset-ID %d", charset);
 530     }
 531
 532   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 533   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 534   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 535   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 536   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 537   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 538   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 539     = make_number (leading_code_base);
 540   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 541     = make_number (leading_code_ext);
 542   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 543   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 544     = iso_graphic_plane;
 545   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 546   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 547   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 548   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 549
 550   {
 551     /* If we have already defined a charset which has the same
 552        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 553        DIRECTION, we must update the entry REVERSE-CHARSET of both
 554        charsets.  If there's no such charset, the value of the entry
 555        is set to nil.  */
 556     int i;
 557
 558     for (i = 0; i <= MAX_CHARSET; i++)
 559       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 560         {
 561           if (CHARSET_DIMENSION (i) == XINT (dimension)
 562               && CHARSET_CHARS (i) == XINT (chars)
 563               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 564               && CHARSET_DIRECTION (i) != XINT (direction))
 565             {
 566               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 567                 = make_number (i);
 568               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 569               break;
 570             }
 571         }
 572     if (i > MAX_CHARSET)
 573       /* No such a charset.  */
 574       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 575         = make_number (-1);
 576   }
 577
 578   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 579       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 580     {
 581       bytes_by_char_head[leading_code_base] = bytes;
 582       width_by_char_head[leading_code_base] = XINT (width);
 583
 584       /* Update table emacs_code_class.  */
 585       emacs_code_class[charset] = (bytes == 2
 586                                    ? EMACS_leading_code_2
 587                                    : (bytes == 3
 588                                       ? EMACS_leading_code_3
 589                                       : EMACS_leading_code_4));
 590     }
 591
 592   /* Update table iso_charset_table.  */
 593   if (XINT (iso_final_char) >= 0
 594       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 595     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 596 }
 597
 598 #ifdef emacs
 599
 600 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 601    is invalid.  */
 602 int
 603 get_charset_id (charset_symbol)
 604      Lisp_Object charset_symbol;
 605 {
 606   Lisp_Object val;
 607   int charset;
 608
 609   /* This originally used a ?: operator, but reportedly the HP-UX
 610      compiler version HP92453-01 A.10.32.22 miscompiles that.  */
 611   if (SYMBOLP (charset_symbol)
 612       && VECTORP (val = Fget (charset_symbol, Qcharset))
 613       && CHARSET_VALID_P (charset =
 614                           XINT (XVECTOR (val)->contents[CHARSET_ID_IDX])))
 615     return charset;
 616   else
 617     return -1;
 618 }
 619
 620 /* Return an identification number for a new private charset of
 621    DIMENSION and WIDTH.  If there's no more room for the new charset,
 622    return 0.  */
 623 Lisp_Object
 624 get_new_private_charset_id (dimension, width)
 625      int dimension, width;
 626 {
 627   int charset, from, to;
 628
 629   if (dimension == 1)
 630     {
 631       from = LEADING_CODE_EXT_11;
 632       to = LEADING_CODE_EXT_21;
 633     }
 634   else
 635     {
 636       from = LEADING_CODE_EXT_21;
 637       to = LEADING_CODE_EXT_MAX + 1;
 638     }
 639
 640   for (charset = from; charset < to; charset++)
 641     if (!CHARSET_DEFINED_P (charset)) break;
 642
 643   return make_number (charset < to ? charset : 0);
 644 }
 645
 646 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 647        doc: /* Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.
 648 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is
 649  treated as a private charset.
 650 INFO-VECTOR is a vector of the format:
 651    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE
 652     SHORT-NAME LONG-NAME DESCRIPTION]
 653 The meanings of each elements is as follows:
 654 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.
 655 CHARS (integer) is the number of characters in a dimension: 94 or 96.
 656 WIDTH (integer) is the number of columns a character in the charset
 657 occupies on the screen: one of 0, 1, and 2.
 658
 659 DIRECTION (integer) is the rendering direction of characters in the
 660 charset when rendering.  If 0, render from left to right, else
 661 render from right to left.
 662
 663 ISO-FINAL-CHAR (character) is the final character of the
 664 corresponding ISO 2022 charset.
 665 It may be -1 if the charset is internal use only.
 666
 667 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked
 668 while encoding to variants of ISO 2022 coding system, one of the
 669 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).
 670 It may be -1 if the charset is internal use only.
 671
 672 SHORT-NAME (string) is the short name to refer to the charset.
 673
 674 LONG-NAME (string) is the long name to refer to the charset.
 675
 676 DESCRIPTION (string) is the description string of the charset.  */)
 677        (charset_id, charset_symbol, info_vector)
 678      Lisp_Object charset_id, charset_symbol, info_vector;
 679 {
 680   Lisp_Object *vec;
 681
 682   if (!NILP (charset_id))
 683     CHECK_NUMBER (charset_id);
 684   CHECK_SYMBOL (charset_symbol);
 685   CHECK_VECTOR (info_vector);
 686
 687   if (! NILP (charset_id))
 688     {
 689       if (! CHARSET_VALID_P (XINT (charset_id)))
 690         error ("Invalid CHARSET: %d", XINT (charset_id));
 691       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 692         error ("Already defined charset: %d", XINT (charset_id));
 693     }
 694
 695   vec = XVECTOR (info_vector)->contents;
 696   if (XVECTOR (info_vector)->size != 9
 697       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 698       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 699       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 700       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 701       || !INTEGERP (vec[4])
 702       || !(XINT (vec[4]) == -1 || (XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~'))
 703       || !INTEGERP (vec[5])
 704       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 705       || !STRINGP (vec[6])
 706       || !STRINGP (vec[7])
 707       || !STRINGP (vec[8]))
 708     error ("Invalid info-vector argument for defining charset %s",
 709            SDATA (SYMBOL_NAME (charset_symbol)));
 710
 711   if (NILP (charset_id))
 712     {
 713       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 714       if (XINT (charset_id) == 0)
 715         error ("There's no room for a new private charset %s",
 716                SDATA (SYMBOL_NAME (charset_symbol)));
 717     }
 718
 719   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 720                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 721   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 722   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 723   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 724   Fupdate_coding_systems_internal ();
 725   return Qnil;
 726 }
 727
 728 DEFUN ("generic-character-list", Fgeneric_character_list,
 729        Sgeneric_character_list, 0, 0, 0,
 730        doc: /* Return a list of all possible generic characters.
 731 It includes a generic character for a charset not yet defined.  */)
 732      ()
 733 {
 734   return Vgeneric_character_list;
 735 }
 736
 737 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 738        Sget_unused_iso_final_char, 2, 2, 0,
 739        doc: /* Return an unused ISO's final char for a charset of DIMENSION and CHARS.
 740 DIMENSION is the number of bytes to represent a character: 1 or 2.
 741 CHARS is the number of characters in a dimension: 94 or 96.
 742
 743 This final char is for private use, thus the range is `0' (48) .. `?' (63).
 744 If there's no unused final char for the specified kind of charset,
 745 return nil.  */)
 746      (dimension, chars)
 747      Lisp_Object dimension, chars;
 748 {
 749   int final_char;
 750
 751   CHECK_NUMBER (dimension);
 752   CHECK_NUMBER (chars);
 753   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 754     error ("Invalid charset dimension %d, it should be 1 or 2",
 755            XINT (dimension));
 756   if (XINT (chars) != 94 && XINT (chars) != 96)
 757     error ("Invalid charset chars %d, it should be 94 or 96",
 758            XINT (chars));
 759   for (final_char = '0'; final_char <= '?'; final_char++)
 760     {
 761       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 762         break;
 763     }
 764   return (final_char <= '?' ? make_number (final_char) : Qnil);
 765 }
 766
 767 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 768        4, 4, 0,
 769        doc: /* Declare an equivalent charset for ISO-2022 decoding.
 770
 771 On decoding by an ISO-2022 base coding system, when a charset
 772 specified by DIMENSION, CHARS, and FINAL-CHAR is designated, behave as
 773 if CHARSET is designated instead.  */)
 774      (dimension, chars, final_char, charset)
 775      Lisp_Object dimension, chars, final_char, charset;
 776 {
 777   int charset_id;
 778
 779   CHECK_NUMBER (dimension);
 780   CHECK_NUMBER (chars);
 781   CHECK_NUMBER (final_char);
 782   CHECK_SYMBOL (charset);
 783
 784   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 785     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 786   if (XINT (chars) != 94 && XINT (chars) != 96)
 787     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 788   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 789     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 790   if ((charset_id = get_charset_id (charset)) < 0)
 791     error ("Invalid charset %s", SDATA (SYMBOL_NAME (charset)));
 792
 793   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset_id;
 794   return Qnil;
 795 }
 796
 797 /* Return information about charsets in the text at PTR of NBYTES
 798    bytes, which are NCHARS characters.  The value is:
 799
 800         0: Each character is represented by one byte.  This is always
 801            true for unibyte text.
 802         1: No charsets other than ascii eight-bit-control,
 803            eight-bit-graphic, and latin-1 are found.
 804         2: Otherwise.
 805
 806    In addition, if CHARSETS is nonzero, for each found charset N, set
 807    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 808    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 809    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 810    1 (note that there's no charset whose ID is 1).  */
 811
 812 int
 813 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 814      const unsigned char *ptr;
 815      int nchars, nbytes, *charsets;
 816      Lisp_Object table;
 817 {
 818   if (nchars == nbytes)
 819     {
 820       if (charsets && nbytes > 0)
 821         {
 822           const unsigned char *endp = ptr + nbytes;
 823           int maskbits = 0;
 824
 825           while (ptr < endp && maskbits != 7)
 826             {
 827               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 828               ptr++;
 829             }
 830
 831           if (maskbits & 1)
 832             charsets[CHARSET_ASCII] = 1;
 833           if (maskbits & 2)
 834             charsets[CHARSET_8_BIT_CONTROL] = 1;
 835           if (maskbits & 4)
 836             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 837         }
 838       return 0;
 839     }
 840   else
 841     {
 842       int return_val = 1;
 843       int bytes, charset, c1, c2;
 844
 845       if (! CHAR_TABLE_P (table))
 846         table = Qnil;
 847
 848       while (nchars-- > 0)
 849         {
 850           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 851           ptr += bytes;
 852
 853           if (!CHARSET_DEFINED_P (charset))
 854             charset = 1;
 855           else if (! NILP (table))
 856             {
 857               int c = translate_char (table, -1, charset, c1, c2);
 858               if (c >= 0)
 859                 charset = CHAR_CHARSET (c);
 860             }
 861
 862           if (return_val == 1
 863               && charset != CHARSET_ASCII
 864               && charset != CHARSET_8_BIT_CONTROL
 865               && charset != CHARSET_8_BIT_GRAPHIC
 866               && charset != charset_latin_iso8859_1)
 867             return_val = 2;
 868
 869           if (charsets)
 870             charsets[charset] = 1;
 871           else if (return_val == 2)
 872             break;
 873         }
 874       return return_val;
 875     }
 876 }
 877
 878 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 879        2, 3, 0,
 880        doc: /* Return a list of charsets in the region between BEG and END.
 881 BEG and END are buffer positions.
 882 Optional arg TABLE if non-nil is a translation table to look up.
 883
 884 If the region contains invalid multibyte characters,
 885 `unknown' is included in the returned list.
 886
 887 If the current buffer is unibyte, the returned list may contain
 888 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 889      (beg, end, table)
 890      Lisp_Object beg, end, table;
 891 {
 892   int charsets[MAX_CHARSET + 1];
 893   int from, from_byte, to, stop, stop_byte, i;
 894   Lisp_Object val;
 895
 896   validate_region (&beg, &end);
 897   from = XFASTINT (beg);
 898   stop = to = XFASTINT (end);
 899
 900   if (from < GPT && GPT < to)
 901     {
 902       stop = GPT;
 903       stop_byte = GPT_BYTE;
 904     }
 905   else
 906     stop_byte = CHAR_TO_BYTE (stop);
 907
 908   from_byte = CHAR_TO_BYTE (from);
 909
 910   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 911   while (1)
 912     {
 913       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 914                             stop_byte - from_byte, charsets, table);
 915       if (stop < to)
 916         {
 917           from = stop, from_byte = stop_byte;
 918           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 919         }
 920       else
 921         break;
 922     }
 923
 924   val = Qnil;
 925   if (charsets[1])
 926     val = Fcons (Qunknown, val);
 927   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 928     if (charsets[i])
 929       val = Fcons (CHARSET_SYMBOL (i), val);
 930   if (charsets[0])
 931     val = Fcons (Qascii, val);
 932   return val;
 933 }
 934
 935 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 936        1, 2, 0,
 937        doc: /* Return a list of charsets in STR.
 938 Optional arg TABLE if non-nil is a translation table to look up.
 939
 940 If the string contains invalid multibyte characters,
 941 `unknown' is included in the returned list.
 942
 943 If STR is unibyte, the returned list may contain
 944 only `ascii', `eight-bit-control', and `eight-bit-graphic'.  */)
 945      (str, table)
 946      Lisp_Object str, table;
 947 {
 948   int charsets[MAX_CHARSET + 1];
 949   int i;
 950   Lisp_Object val;
 951
 952   CHECK_STRING (str);
 953
 954   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 955   find_charset_in_text (SDATA (str), SCHARS (str),
 956                         SBYTES (str), charsets, table);
 957
 958   val = Qnil;
 959   if (charsets[1])
 960     val = Fcons (Qunknown, val);
 961   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 962     if (charsets[i])
 963       val = Fcons (CHARSET_SYMBOL (i), val);
 964   if (charsets[0])
 965     val = Fcons (Qascii, val);
 966   return val;
 967 }
 968
 969 \f
 970 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 971        doc: /* Return a character made from arguments.
 972 Internal use only.  */)
 973      (charset, code1, code2)
 974      Lisp_Object charset, code1, code2;
 975 {
 976   int charset_id, c1, c2;
 977
 978   CHECK_NUMBER (charset);
 979   charset_id = XINT (charset);
 980   if (!CHARSET_DEFINED_P (charset_id))
 981     error ("Invalid charset ID: %d", XINT (charset));
 982
 983   if (NILP (code1))
 984     c1 = 0;
 985   else
 986     {
 987       CHECK_NUMBER (code1);
 988       c1 = XINT (code1);
 989     }
 990   if (NILP (code2))
 991     c2 = 0;
 992   else
 993     {
 994       CHECK_NUMBER (code2);
 995       c2 = XINT (code2);
 996     }
 997
 998   if (charset_id == CHARSET_ASCII)
 999     {
1000       if (c1 < 0 || c1 > 0x7F)
1001         goto invalid_code_posints;
1002       return make_number (c1);
1003     }
1004   else if (charset_id == CHARSET_8_BIT_CONTROL)
1005     {
1006       if (NILP (code1))
1007         c1 = 0x80;
1008       else if (c1 < 0x80 || c1 > 0x9F)
1009         goto invalid_code_posints;
1010       return make_number (c1);
1011     }
1012   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
1013     {
1014       if (NILP (code1))
1015         c1 = 0xA0;
1016       else if (c1 < 0xA0 || c1 > 0xFF)
1017         goto invalid_code_posints;
1018       return make_number (c1);
1019     }
1020   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
1021     goto invalid_code_posints;
1022   c1 &= 0x7F;
1023   c2 &= 0x7F;
1024   if (c1 == 0
1025       ? c2 != 0
1026       : (c2 == 0
1027          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
1028          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
1029     goto invalid_code_posints;
1030   return make_number (MAKE_CHAR (charset_id, c1, c2));
1031
1032  invalid_code_posints:
1033   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
1034 }
1035
1036 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1037        doc: /* Return list of charset and one or two position-codes of CH.
1038 If CH is invalid as a character code,
1039 return a list of symbol `unknown' and CH.  */)
1040      (ch)
1041      Lisp_Object ch;
1042 {
1043   int c, charset, c1, c2;
1044
1045   CHECK_NUMBER (ch);
1046   c = XFASTINT (ch);
1047   if (!CHAR_VALID_P (c, 1))
1048     return Fcons (Qunknown, Fcons (ch, Qnil));
1049   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1050   return (c2 >= 0
1051           ? Fcons (CHARSET_SYMBOL (charset),
1052                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1053           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1054 }
1055
1056 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1057        doc: /* Return charset of CH.  */)
1058      (ch)
1059      Lisp_Object ch;
1060 {
1061   CHECK_NUMBER (ch);
1062
1063   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1064 }
1065
1066 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1067        doc: /* Return charset of a character in the current buffer at position POS.
1068 If POS is nil, it defauls to the current point.
1069 If POS is out of range, the value is nil.  */)
1070      (pos)
1071      Lisp_Object pos;
1072 {
1073   Lisp_Object ch;
1074   int charset;
1075
1076   ch = Fchar_after (pos);
1077   if (! INTEGERP (ch))
1078     return ch;
1079   charset = CHAR_CHARSET (XINT (ch));
1080   return CHARSET_SYMBOL (charset);
1081 }
1082
1083 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1084        doc: /* Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.
1085
1086 ISO 2022's designation sequence (escape sequence) distinguishes charsets
1087 by their DIMENSION, CHARS, and FINAL-CHAR,
1088 where as Emacs distinguishes them by charset symbol.
1089 See the documentation of the function `charset-info' for the meanings of
1090 DIMENSION, CHARS, and FINAL-CHAR.  */)
1091      (dimension, chars, final_char)
1092      Lisp_Object dimension, chars, final_char;
1093 {
1094   int charset;
1095
1096   CHECK_NUMBER (dimension);
1097   CHECK_NUMBER (chars);
1098   CHECK_NUMBER (final_char);
1099
1100   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1101     return Qnil;
1102   return CHARSET_SYMBOL (charset);
1103 }
1104
1105 /* If GENERICP is nonzero, return nonzero if C is a valid normal or
1106    generic character.  If GENERICP is zero, return nonzero if C is a
1107    valid normal character.  Do not call this function directly,
1108    instead use macro CHAR_VALID_P.  */
1109 int
1110 char_valid_p (c, genericp)
1111      int c, genericp;
1112 {
1113   int charset, c1, c2;
1114
1115   if (c < 0 || c >= MAX_CHAR)
1116     return 0;
1117   if (SINGLE_BYTE_CHAR_P (c))
1118     return 1;
1119   SPLIT_CHAR (c, charset, c1, c2);
1120   if (genericp)
1121     {
1122       if (c1)
1123         {
1124           if (c2 <= 0) c2 = 0x20;
1125         }
1126       else
1127         {
1128           if (c2 <= 0) c1 = c2 = 0x20;
1129         }
1130     }
1131   return (CHARSET_DEFINED_P (charset)
1132           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1133 }
1134
1135 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1136        doc: /* Return t if OBJECT is a valid normal character.
1137 If optional arg GENERICP is non-nil, also return t if OBJECT is
1138 a valid generic character.  */)
1139      (object, genericp)
1140      Lisp_Object object, genericp;
1141 {
1142   if (! NATNUMP (object))
1143     return Qnil;
1144   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1145 }
1146
1147 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1148        Sunibyte_char_to_multibyte, 1, 1, 0,
1149        doc: /* Convert the unibyte character CH to multibyte character.
1150 The conversion is done based on `nonascii-translation-table' (which see)
1151  or `nonascii-insert-offset' (which see).  */)
1152      (ch)
1153      Lisp_Object ch;
1154 {
1155   int c;
1156
1157   CHECK_NUMBER (ch);
1158   c = XINT (ch);
1159   if (c < 0 || c >= 0400)
1160     error ("Invalid unibyte character: %d", c);
1161   c = unibyte_char_to_multibyte (c);
1162   if (c < 0)
1163     error ("Can't convert to multibyte character: %d", XINT (ch));
1164   return make_number (c);
1165 }
1166
1167 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1168        Smultibyte_char_to_unibyte, 1, 1, 0,
1169        doc: /* Convert the multibyte character CH to unibyte character.
1170 The conversion is done based on `nonascii-translation-table' (which see)
1171  or `nonascii-insert-offset' (which see).  */)
1172      (ch)
1173      Lisp_Object ch;
1174 {
1175   int c;
1176
1177   CHECK_NUMBER (ch);
1178   c = XINT (ch);
1179   if (! CHAR_VALID_P (c, 0))
1180     error ("Invalid multibyte character: %d", c);
1181   c = multibyte_char_to_unibyte (c, Qnil);
1182   if (c < 0)
1183     error ("Can't convert to unibyte character: %d", XINT (ch));
1184   return make_number (c);
1185 }
1186
1187 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1188        doc: /* Return 1 regardless of the argument CH.  */)
1189      (ch)
1190      Lisp_Object ch;
1191 {
1192   CHECK_NUMBER (ch);
1193   return make_number (1);
1194 }
1195
1196 /* Return how many bytes C will occupy in a multibyte buffer.
1197    Don't call this function directly, instead use macro CHAR_BYTES.  */
1198 int
1199 char_bytes (c)
1200      int c;
1201 {
1202   int charset;
1203
1204   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1205     return 1;
1206   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1207     return 1;
1208
1209   charset = CHAR_CHARSET (c);
1210   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1211 }
1212
1213 /* Return the width of character of which multi-byte form starts with
1214    C.  The width is measured by how many columns occupied on the
1215    screen when displayed in the current buffer.  */
1216
1217 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1218   (c < 0x20                                                             \
1219    ? (c == '\t'                                                         \
1220       ? XFASTINT (current_buffer->tab_width)                            \
1221       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1222    : (c < 0x7f                                                          \
1223       ? 1                                                               \
1224       : (c == 0x7F                                                      \
1225          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1226          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1227              && BASE_LEADING_CODE_P (c))                                \
1228             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1229             : 4))))
1230
1231 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1232        doc: /* Return width of CH when displayed in the current buffer.
1233 The width is measured by how many columns it occupies on the screen.
1234 Tab is taken to occupy `tab-width' columns.  */)
1235      (ch)
1236      Lisp_Object ch;
1237 {
1238   Lisp_Object val, disp;
1239   int c;
1240   struct Lisp_Char_Table *dp = buffer_display_table ();
1241
1242   CHECK_NUMBER (ch);
1243
1244   c = XINT (ch);
1245
1246   /* Get the way the display table would display it.  */
1247   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1248
1249   if (VECTORP (disp))
1250     XSETINT (val, XVECTOR (disp)->size);
1251   else if (SINGLE_BYTE_CHAR_P (c))
1252     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1253   else
1254     {
1255       int charset = CHAR_CHARSET (c);
1256
1257       XSETFASTINT (val, CHARSET_WIDTH (charset));
1258     }
1259   return val;
1260 }
1261
1262 /* Return width of string STR of length LEN when displayed in the
1263    current buffer.  The width is measured by how many columns it
1264    occupies on the screen.  */
1265
1266 int
1267 strwidth (str, len)
1268      unsigned char *str;
1269      int len;
1270 {
1271   return c_string_width (str, len, -1, NULL, NULL);
1272 }
1273
1274 /* Return width of string STR of length LEN when displayed in the
1275    current buffer.  The width is measured by how many columns it
1276    occupies on the screen.  If PRECISION > 0, return the width of
1277    longest substring that doesn't exceed PRECISION, and set number of
1278    characters and bytes of the substring in *NCHARS and *NBYTES
1279    respectively.  */
1280
1281 int
1282 c_string_width (str, len, precision, nchars, nbytes)
1283      const unsigned char *str;
1284      int len, precision, *nchars, *nbytes;
1285 {
1286   int i = 0, i_byte = 0;
1287   int width = 0;
1288   int chars;
1289   struct Lisp_Char_Table *dp = buffer_display_table ();
1290
1291   while (i_byte < len)
1292     {
1293       int bytes, thiswidth;
1294       Lisp_Object val;
1295
1296       if (dp)
1297         {
1298           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1299
1300           chars = 1;
1301           val = DISP_CHAR_VECTOR (dp, c);
1302           if (VECTORP (val))
1303             thiswidth = XVECTOR (val)->size;
1304           else
1305             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1306         }
1307       else
1308         {
1309           chars = 1;
1310           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1311           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1312         }
1313
1314       if (precision > 0
1315           && (width + thiswidth > precision))
1316         {
1317           *nchars = i;
1318           *nbytes = i_byte;
1319           return width;
1320         }
1321       i++;
1322       i_byte += bytes;
1323       width += thiswidth;
1324   }
1325
1326   if (precision > 0)
1327     {
1328       *nchars = i;
1329       *nbytes = i_byte;
1330     }
1331
1332   return width;
1333 }
1334
1335 /* Return width of Lisp string STRING when displayed in the current
1336    buffer.  The width is measured by how many columns it occupies on
1337    the screen while paying attention to compositions.  If PRECISION >
1338    0, return the width of longest substring that doesn't exceed
1339    PRECISION, and set number of characters and bytes of the substring
1340    in *NCHARS and *NBYTES respectively.  */
1341
1342 int
1343 lisp_string_width (string, precision, nchars, nbytes)
1344      Lisp_Object string;
1345      int precision, *nchars, *nbytes;
1346 {
1347   int len = SCHARS (string);
1348   int len_byte = SBYTES (string);
1349   /* This set multibyte to 0 even if STRING is multibyte when it
1350      contains only ascii and eight-bit-graphic, but that's
1351      intentional.  */
1352   int multibyte = len < len_byte;
1353   const unsigned char *str = SDATA (string);
1354   int i = 0, i_byte = 0;
1355   int width = 0;
1356   struct Lisp_Char_Table *dp = buffer_display_table ();
1357
1358   while (i < len)
1359     {
1360       int chars, bytes, thiswidth;
1361       Lisp_Object val;
1362       int cmp_id;
1363       int ignore, end;
1364
1365       if (find_composition (i, -1, &ignore, &end, &val, string)
1366           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1367               >= 0))
1368         {
1369           thiswidth = composition_table[cmp_id]->width;
1370           chars = end - i;
1371           bytes = string_char_to_byte (string, end) - i_byte;
1372         }
1373       else if (dp)
1374         {
1375           int c;
1376
1377           if (multibyte)
1378             c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1379           else
1380             c = str[i_byte], bytes = 1;
1381           chars = 1;
1382           val = DISP_CHAR_VECTOR (dp, c);
1383           if (VECTORP (val))
1384             thiswidth = XVECTOR (val)->size;
1385           else
1386             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1387         }
1388       else
1389         {
1390           chars = 1;
1391           if (multibyte)
1392             PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1393           else
1394             bytes = 1;
1395           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1396         }
1397
1398       if (precision > 0
1399           && (width + thiswidth > precision))
1400         {
1401           *nchars = i;
1402           *nbytes = i_byte;
1403           return width;
1404         }
1405       i += chars;
1406       i_byte += bytes;
1407       width += thiswidth;
1408   }
1409
1410   if (precision > 0)
1411     {
1412       *nchars = i;
1413       *nbytes = i_byte;
1414     }
1415
1416   return width;
1417 }
1418
1419 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1420        doc: /* Return width of STRING when displayed in the current buffer.
1421 Width is measured by how many columns it occupies on the screen.
1422 When calculating width of a multibyte character in STRING,
1423 only the base leading-code is considered; the validity of
1424 the following bytes is not checked.  Tabs in STRING are always
1425 taken to occupy `tab-width' columns.  */)
1426      (string)
1427      Lisp_Object string;
1428 {
1429   Lisp_Object val;
1430
1431   CHECK_STRING (string);
1432   XSETFASTINT (val, lisp_string_width (string, -1, NULL, NULL));
1433   return val;
1434 }
1435
1436 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1437        doc: /* Return the direction of CH.
1438 The returned value is 0 for left-to-right and 1 for right-to-left.  */)
1439      (ch)
1440      Lisp_Object ch;
1441 {
1442   int charset;
1443
1444   CHECK_NUMBER (ch);
1445   charset = CHAR_CHARSET (XFASTINT (ch));
1446   if (!CHARSET_DEFINED_P (charset))
1447     invalid_character (XINT (ch));
1448   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1449 }
1450
1451 /* Return the number of characters in the NBYTES bytes at PTR.
1452    This works by looking at the contents and checking for multibyte sequences.
1453    However, if the current buffer has enable-multibyte-characters = nil,
1454    we treat each byte as a character.  */
1455
1456 int
1457 chars_in_text (ptr, nbytes)
1458      const unsigned char *ptr;
1459      int nbytes;
1460 {
1461   /* current_buffer is null at early stages of Emacs initialization.  */
1462   if (current_buffer == 0
1463       || NILP (current_buffer->enable_multibyte_characters))
1464     return nbytes;
1465
1466   return multibyte_chars_in_text (ptr, nbytes);
1467 }
1468
1469 /* Return the number of characters in the NBYTES bytes at PTR.
1470    This works by looking at the contents and checking for multibyte sequences.
1471    It ignores enable-multibyte-characters.  */
1472
1473 int
1474 multibyte_chars_in_text (ptr, nbytes)
1475      const unsigned char *ptr;
1476      int nbytes;
1477 {
1478   const unsigned char *endp;
1479   int chars, bytes;
1480
1481   endp = ptr + nbytes;
1482   chars = 0;
1483
1484   while (ptr < endp)
1485     {
1486       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1487       ptr += bytes;
1488       chars++;
1489     }
1490
1491   return chars;
1492 }
1493
1494 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1495    count the numbers of characters and bytes in it.  On counting
1496    bytes, pay attention to the fact that 8-bit characters in the range
1497    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1498 void
1499 parse_str_as_multibyte (str, len, nchars, nbytes)
1500      const unsigned char *str;
1501      int len, *nchars, *nbytes;
1502 {
1503   const unsigned char *endp = str + len;
1504   int n, chars = 0, bytes = 0;
1505
1506   while (str < endp)
1507     {
1508       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1509         str += n, bytes += n;
1510       else
1511         str++, bytes += 2;
1512       chars++;
1513     }
1514   *nchars = chars;
1515   *nbytes = bytes;
1516   return;
1517 }
1518
1519 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1520    It actually converts only 8-bit characters in the range 0x80..0x9F
1521    that don't contruct multibyte characters to multibyte forms.  If
1522    NCHARS is nonzero, set *NCHARS to the number of characters in the
1523    text.  It is assured that we can use LEN bytes at STR as a work
1524    area and that is enough.  Return the number of bytes of the
1525    resulting text.  */
1526
1527 int
1528 str_as_multibyte (str, len, nbytes, nchars)
1529      unsigned char *str;
1530      int len, nbytes, *nchars;
1531 {
1532   unsigned char *p = str, *endp = str + nbytes;
1533   unsigned char *to;
1534   int chars = 0;
1535   int n;
1536
1537   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1538     p += n, chars++;
1539   if (nchars)
1540     *nchars = chars;
1541   if (p == endp)
1542     return nbytes;
1543
1544   to = p;
1545   nbytes = endp - p;
1546   endp = str + len;
1547   safe_bcopy (p, endp - nbytes, nbytes);
1548   p = endp - nbytes;
1549   while (p < endp)
1550     {
1551       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1552         {
1553           while (n--)
1554             *to++ = *p++;
1555         }
1556       else
1557         {
1558           *to++ = LEADING_CODE_8_BIT_CONTROL;
1559           *to++ = *p++ + 0x20;
1560         }
1561       chars++;
1562     }
1563   if (nchars)
1564     *nchars = chars;
1565   return (to - str);
1566 }
1567
1568 /* Parse unibyte string at STR of LEN bytes, and return the number of
1569    bytes it may ocupy when converted to multibyte string by
1570    `str_to_multibyte'.  */
1571
1572 int
1573 parse_str_to_multibyte (str, len)
1574      unsigned char *str;
1575      int len;
1576 {
1577   unsigned char *endp = str + len;
1578   int bytes;
1579
1580   for (bytes = 0; str < endp; str++)
1581     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1582   return bytes;
1583 }
1584
1585 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1586    that contains the same single-byte characters.  It actually
1587    converts all 8-bit characters to multibyte forms.  It is assured
1588    that we can use LEN bytes at STR as a work area and that is
1589    enough.  */
1590
1591 int
1592 str_to_multibyte (str, len, bytes)
1593      unsigned char *str;
1594      int len, bytes;
1595 {
1596   unsigned char *p = str, *endp = str + bytes;
1597   unsigned char *to;
1598
1599   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1600   if (p == endp)
1601     return bytes;
1602   to = p;
1603   bytes = endp - p;
1604   endp = str + len;
1605   safe_bcopy (p, endp - bytes, bytes);
1606   p = endp - bytes;
1607   while (p < endp)
1608     {
1609       if (*p < 0x80 || *p >= 0xA0)
1610         *to++ = *p++;
1611       else
1612         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1613     }
1614   return (to - str);
1615 }
1616
1617 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1618    actually converts only 8-bit characters in the range 0x80..0x9F to
1619    unibyte forms.  */
1620
1621 int
1622 str_as_unibyte (str, bytes)
1623      unsigned char *str;
1624      int bytes;
1625 {
1626   unsigned char *p = str, *endp = str + bytes;
1627   unsigned char *to = str;
1628
1629   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1630   to = p;
1631   while (p < endp)
1632     {
1633       if (*p == LEADING_CODE_8_BIT_CONTROL)
1634         *to++ = *(p + 1) - 0x20, p += 2;
1635       else
1636         *to++ = *p++;
1637     }
1638   return (to - str);
1639 }
1640
1641 \f
1642 DEFUN ("string", Fstring, Sstring, 0, MANY, 0,
1643   doc: /* Concatenate all the argument characters and make the result a string.
1644 usage: (string &rest CHARACTERS)  */)
1645      (n, args)
1646      int n;
1647      Lisp_Object *args;
1648 {
1649   int i, bufsize;
1650   unsigned char *buf, *p;
1651   int c;
1652   int multibyte = 0;
1653   Lisp_Object ret;
1654   USE_SAFE_ALLOCA;
1655
1656   bufsize = MAX_MULTIBYTE_LENGTH * n;
1657   SAFE_ALLOCA (buf, unsigned char *, bufsize);
1658   p = buf;
1659
1660   for (i = 0; i < n; i++)
1661     {
1662       CHECK_NUMBER (args[i]);
1663       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1664         multibyte = 1;
1665     }
1666
1667   for (i = 0; i < n; i++)
1668     {
1669       c = XINT (args[i]);
1670       if (multibyte)
1671         p += CHAR_STRING (c, p);
1672       else
1673         *p++ = c;
1674     }
1675
1676   ret = make_string_from_bytes (buf, n, p - buf);
1677   SAFE_FREE ();
1678
1679   return ret;
1680 }
1681
1682 #endif /* emacs */
1683 \f
1684 int
1685 charset_id_internal (charset_name)
1686      char *charset_name;
1687 {
1688   Lisp_Object val;
1689
1690   val= Fget (intern (charset_name), Qcharset);
1691   if (!VECTORP (val))
1692     error ("Charset %s is not defined", charset_name);
1693
1694   return (XINT (XVECTOR (val)->contents[0]));
1695 }
1696
1697 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1698        Ssetup_special_charsets, 0, 0, 0, doc: /* Internal use only.  */)
1699      ()
1700 {
1701   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1702   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1703   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1704   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1705   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1706   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1707   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1708   charset_mule_unicode_0100_24ff
1709     = charset_id_internal ("mule-unicode-0100-24ff");
1710   charset_mule_unicode_2500_33ff
1711     = charset_id_internal ("mule-unicode-2500-33ff");
1712   charset_mule_unicode_e000_ffff
1713     = charset_id_internal ("mule-unicode-e000-ffff");
1714   return Qnil;
1715 }
1716
1717 void
1718 init_charset_once ()
1719 {
1720   int i, j, k;
1721
1722   staticpro (&Vcharset_table);
1723   staticpro (&Vcharset_symbol_table);
1724   staticpro (&Vgeneric_character_list);
1725
1726   /* This has to be done here, before we call Fmake_char_table.  */
1727   Qcharset_table = intern ("charset-table");
1728   staticpro (&Qcharset_table);
1729
1730   /* Intern this now in case it isn't already done.
1731      Setting this variable twice is harmless.
1732      But don't staticpro it here--that is done in alloc.c.  */
1733   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1734
1735   /* Now we are ready to set up this property, so we can
1736      create the charset table.  */
1737   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1738   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1739
1740   Qunknown = intern ("unknown");
1741   staticpro (&Qunknown);
1742   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1743                                         Qunknown);
1744
1745   /* Setup tables.  */
1746   for (i = 0; i < 2; i++)
1747     for (j = 0; j < 2; j++)
1748       for (k = 0; k < 128; k++)
1749         iso_charset_table [i][j][k] = -1;
1750
1751   for (i = 0; i < 256; i++)
1752     bytes_by_char_head[i] = 1;
1753   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1754   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1755   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1756   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1757
1758   for (i = 0; i < 128; i++)
1759     width_by_char_head[i] = 1;
1760   for (; i < 256; i++)
1761     width_by_char_head[i] = 4;
1762   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1763   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1764   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1765   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1766
1767   {
1768     Lisp_Object val;
1769
1770     val = Qnil;
1771     for (i = 0x81; i < 0x90; i++)
1772       val = Fcons (make_number ((i - 0x70) << 7), val);
1773     for (; i < 0x9A; i++)
1774       val = Fcons (make_number ((i - 0x8F) << 14), val);
1775     for (i = 0xA0; i < 0xF0; i++)
1776       val = Fcons (make_number ((i - 0x70) << 7), val);
1777     for (; i < 0xFF; i++)
1778       val = Fcons (make_number ((i - 0xE0) << 14), val);
1779     Vgeneric_character_list = Fnreverse (val);
1780   }
1781
1782   nonascii_insert_offset = 0;
1783   Vnonascii_translation_table = Qnil;
1784 }
1785
1786 #ifdef emacs
1787
1788 void
1789 syms_of_charset ()
1790 {
1791   Qcharset = intern ("charset");
1792   staticpro (&Qcharset);
1793
1794   Qascii = intern ("ascii");
1795   staticpro (&Qascii);
1796
1797   Qeight_bit_control = intern ("eight-bit-control");
1798   staticpro (&Qeight_bit_control);
1799
1800   Qeight_bit_graphic = intern ("eight-bit-graphic");
1801   staticpro (&Qeight_bit_graphic);
1802
1803   /* Define special charsets ascii, eight-bit-control, and
1804      eight-bit-graphic.  */
1805   update_charset_table (make_number (CHARSET_ASCII),
1806                         make_number (1), make_number (94),
1807                         make_number (1),
1808                         make_number (0),
1809                         make_number ('B'),
1810                         make_number (0),
1811                         build_string ("ASCII"),
1812                         Qnil,   /* same as above */
1813                         build_string ("ASCII (ISO646 IRV)"));
1814   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1815   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1816
1817   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1818                         make_number (1), make_number (96),
1819                         make_number (4),
1820                         make_number (0),
1821                         make_number (-1),
1822                         make_number (-1),
1823                         build_string ("8-bit control code (0x80..0x9F)"),
1824                         Qnil,   /* same as above */
1825                         Qnil);  /* same as above */
1826   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1827   Fput (Qeight_bit_control, Qcharset,
1828         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1829
1830   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1831                         make_number (1), make_number (96),
1832                         make_number (4),
1833                         make_number (0),
1834                         make_number (-1),
1835                         make_number (-1),
1836                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1837                         Qnil,   /* same as above */
1838                         Qnil);  /* same as above */
1839   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1840   Fput (Qeight_bit_graphic, Qcharset,
1841         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1842
1843   Qauto_fill_chars = intern ("auto-fill-chars");
1844   staticpro (&Qauto_fill_chars);
1845   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1846
1847   defsubr (&Sdefine_charset);
1848   defsubr (&Sgeneric_character_list);
1849   defsubr (&Sget_unused_iso_final_char);
1850   defsubr (&Sdeclare_equiv_charset);
1851   defsubr (&Sfind_charset_region);
1852   defsubr (&Sfind_charset_string);
1853   defsubr (&Smake_char_internal);
1854   defsubr (&Ssplit_char);
1855   defsubr (&Schar_charset);
1856   defsubr (&Scharset_after);
1857   defsubr (&Siso_charset);
1858   defsubr (&Schar_valid_p);
1859   defsubr (&Sunibyte_char_to_multibyte);
1860   defsubr (&Smultibyte_char_to_unibyte);
1861   defsubr (&Schar_bytes);
1862   defsubr (&Schar_width);
1863   defsubr (&Sstring_width);
1864   defsubr (&Schar_direction);
1865   defsubr (&Sstring);
1866   defsubr (&Ssetup_special_charsets);
1867
1868   DEFVAR_LISP ("charset-list", &Vcharset_list,
1869                doc: /* List of charsets ever defined.  */);
1870   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1871                                         Fcons (Qeight_bit_graphic, Qnil)));
1872
1873   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1874                doc: /* Vector of cons cell of a symbol and translation table ever defined.
1875 An ID of a translation table is an index of this vector.  */);
1876   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1877
1878   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1879               doc: /* Leading-code of private TYPE9N charset of column-width 1.  */);
1880   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1881
1882   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1883               doc: /* Leading-code of private TYPE9N charset of column-width 2.  */);
1884   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1885
1886   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1887               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 1.  */);
1888   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1889
1890   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1891               doc: /* Leading-code of private TYPE9Nx9N charset of column-width 2.  */);
1892   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1893
1894   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1895               doc: /* Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.
1896 This is used for converting unibyte text to multibyte,
1897 and for inserting character codes specified by number.
1898
1899 This serves to convert a Latin-1 or similar 8-bit character code
1900 to the corresponding Emacs multibyte character code.
1901 Typically the value should be (- (make-char CHARSET 0) 128),
1902 for your choice of character set.
1903 If `nonascii-translation-table' is non-nil, it overrides this variable.  */);
1904   nonascii_insert_offset = 0;
1905
1906   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1907                doc: /* Translation table to convert non-ASCII unibyte codes to multibyte.
1908 This is used for converting unibyte text to multibyte,
1909 and for inserting character codes specified by number.
1910
1911 Conversion is performed only when multibyte characters are enabled,
1912 and it serves to convert a Latin-1 or similar 8-bit character code
1913 to the corresponding Emacs character code.
1914
1915 If this is nil, `nonascii-insert-offset' is used instead.
1916 See also the docstring of `make-translation-table'.  */);
1917   Vnonascii_translation_table = Qnil;
1918
1919   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1920                doc: /* A char-table for characters which invoke auto-filling.
1921 Such characters have value t in this table.  */);
1922   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1923   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1924   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1925 }
1926
1927 #endif /* emacs */
1928
1929 /* arch-tag: 66a89b8d-4c28-47d3-9ca1-56f78440d69f
1930    (do not change this comment) */