code.delx.au - gnu-emacs/blob - src/charset.c

   1 /* Basic multilingual character support.
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /* At first, see the document in `charset.h' to understand the code in
  23    this file.  */
  24
  25 #ifdef emacs
  26 #include <config.h>
  27 #endif
  28
  29 #include <stdio.h>
  30
  31 #ifdef emacs
  32
  33 #include <sys/types.h>
  34 #include "lisp.h"
  35 #include "buffer.h"
  36 #include "charset.h"
  37 #include "composite.h"
  38 #include "coding.h"
  39 #include "disptab.h"
  40
  41 #else  /* not emacs */
  42
  43 #include "mulelib.h"
  44
  45 #endif /* emacs */
  46
  47 Lisp_Object Qcharset, Qascii, Qeight_bit_control, Qeight_bit_graphic;
  48 Lisp_Object Qunknown;
  49
  50 /* Declaration of special leading-codes.  */
  51 int leading_code_private_11;    /* for private DIMENSION1 of 1-column */
  52 int leading_code_private_12;    /* for private DIMENSION1 of 2-column */
  53 int leading_code_private_21;    /* for private DIMENSION2 of 1-column */
  54 int leading_code_private_22;    /* for private DIMENSION2 of 2-column */
  55
  56 /* Declaration of special charsets.  The values are set by
  57    Fsetup_special_charsets.  */
  58 int charset_latin_iso8859_1;    /* ISO8859-1 (Latin-1) */
  59 int charset_jisx0208_1978;      /* JISX0208.1978 (Japanese Kanji old set) */
  60 int charset_jisx0208;           /* JISX0208.1983 (Japanese Kanji) */
  61 int charset_katakana_jisx0201;  /* JISX0201.Kana (Japanese Katakana) */
  62 int charset_latin_jisx0201;     /* JISX0201.Roman (Japanese Roman) */
  63 int charset_big5_1;             /* Big5 Level 1 (Chinese Traditional) */
  64 int charset_big5_2;             /* Big5 Level 2 (Chinese Traditional) */
  65
  66 Lisp_Object Qcharset_table;
  67
  68 /* A char-table containing information of each character set.  */
  69 Lisp_Object Vcharset_table;
  70
  71 /* A vector of charset symbol indexed by charset-id.  This is used
  72    only for returning charset symbol from C functions.  */
  73 Lisp_Object Vcharset_symbol_table;
  74
  75 /* A list of charset symbols ever defined.  */
  76 Lisp_Object Vcharset_list;
  77
  78 /* Vector of translation table ever defined.
  79    ID of a translation table is used to index this vector.  */
  80 Lisp_Object Vtranslation_table_vector;
  81
  82 /* A char-table for characters which may invoke auto-filling.  */
  83 Lisp_Object Vauto_fill_chars;
  84
  85 Lisp_Object Qauto_fill_chars;
  86
  87 /* Tables used by macros BYTES_BY_CHAR_HEAD and WIDTH_BY_CHAR_HEAD.  */
  88 int bytes_by_char_head[256];
  89 int width_by_char_head[256];
  90
  91 /* Mapping table from ISO2022's charset (specified by DIMENSION,
  92    CHARS, and FINAL-CHAR) to Emacs' charset.  */
  93 int iso_charset_table[2][2][128];
  94
  95 /* Variables used locally in the macro FETCH_MULTIBYTE_CHAR.  */
  96 unsigned char *_fetch_multibyte_char_p;
  97 int _fetch_multibyte_char_len;
  98
  99 /* Offset to add to a non-ASCII value when inserting it.  */
 100 int nonascii_insert_offset;
 101
 102 /* Translation table for converting non-ASCII unibyte characters
 103    to multibyte codes, or nil.  */
 104 Lisp_Object Vnonascii_translation_table;
 105
 106 /* List of all possible generic characters.  */
 107 Lisp_Object Vgeneric_character_list;
 108
 109 #define min(X, Y) ((X) < (Y) ? (X) : (Y))
 110 #define max(X, Y) ((X) > (Y) ? (X) : (Y))
 111 \f
 112 void
 113 invalid_character (c)
 114      int c;
 115 {
 116   error ("Invalid character: 0%o, %d, 0x%x", c, c, c);
 117 }
 118
 119 /* Parse string STR of length LENGTH and fetch information of a
 120    character at STR.  Set BYTES to the byte length the character
 121    occupies, CHARSET, C1, C2 to proper values of the character. */
 122
 123 #define SPLIT_MULTIBYTE_SEQ(str, length, bytes, charset, c1, c2)             \
 124   do {                                                                       \
 125     (c1) = *(str);                                                           \
 126     (bytes) = BYTES_BY_CHAR_HEAD (c1);                                       \
 127     if ((bytes) == 1)                                                        \
 128       (charset) = ASCII_BYTE_P (c1) ? CHARSET_ASCII : CHARSET_8_BIT_GRAPHIC; \
 129     else if ((bytes) == 2)                                                   \
 130       {                                                                      \
 131         if ((c1) == LEADING_CODE_8_BIT_CONTROL)                              \
 132           (charset) = CHARSET_8_BIT_CONTROL, (c1) = (str)[1] - 0x20;         \
 133         else                                                                 \
 134           (charset) = (c1), (c1) = (str)[1] & 0x7F;                          \
 135       }                                                                      \
 136     else if ((bytes) == 3)                                                   \
 137       {                                                                      \
 138         if ((c1) < LEADING_CODE_PRIVATE_11)                                  \
 139           (charset) = (c1), (c1) = (str)[1] & 0x7F, (c2) = (str)[2] & 0x7F;  \
 140         else                                                                 \
 141           (charset) = (str)[1], (c1) = (str)[2] & 0x7F;                      \
 142       }                                                                      \
 143     else                                                                     \
 144       (charset) = (str)[1], (c1) = (str)[2] & 0x7F, (c2) = (str)[3] & 0x7F;  \
 145   } while (0)
 146
 147 /* 1 if CHARSET, C1, and C2 compose a valid character, else 0.  */
 148 #define CHAR_COMPONENTS_VALID_P(charset, c1, c2)        \
 149   ((charset) == CHARSET_ASCII                           \
 150    ? ((c1) >= 0 && (c1) <= 0x7F)                        \
 151    : ((charset) == CHARSET_8_BIT_CONTROL                \
 152       ? ((c1) >= 0x80 && (c1) <= 0x9F)                  \
 153       : ((charset) == CHARSET_8_BIT_GRAPHIC             \
 154          ? ((c1) >= 0x80 && (c1) <= 0xFF)               \
 155          : (CHARSET_DIMENSION (charset) == 1            \
 156             ? ((c1) >= 0x20 && (c1) <= 0x7F)            \
 157             : ((c1) >= 0x20 && (c1) <= 0x7F             \
 158                && (c2) >= 0x20 && (c2) <= 0x7F)))))
 159
 160 /* Store multi-byte form of the character C in STR.  The caller should
 161    allocate at least 4-byte area at STR in advance.  Returns the
 162    length of the multi-byte form.  If C is an invalid character code,
 163    signal an error.
 164
 165    Use macro `CHAR_STRING (C, STR)' instead of calling this function
 166    directly if C can be an ASCII character.  */
 167
 168 int
 169 char_to_string (c, str)
 170      int c;
 171      unsigned char *str;
 172 {
 173   unsigned char *p = str;
 174
 175   if (c & CHAR_MODIFIER_MASK)   /* This includes the case C is negative.  */
 176     {
 177       /* Multibyte character can't have a modifier bit.  */
 178       if (! SINGLE_BYTE_CHAR_P ((c & ~CHAR_MODIFIER_MASK)))
 179         invalid_character (c);
 180
 181       /* For Meta, Shift, and Control modifiers, we need special care.  */
 182       if (c & CHAR_META)
 183         {
 184           /* Move the meta bit to the right place for a string.  */
 185           c = (c & ~CHAR_META) | 0x80;
 186         }
 187       if (c & CHAR_SHIFT)
 188         {
 189           /* Shift modifier is valid only with [A-Za-z].  */
 190           if ((c & 0377) >= 'A' && (c & 0377) <= 'Z')
 191             c &= ~CHAR_SHIFT;
 192           else if ((c & 0377) >= 'a' && (c & 0377) <= 'z')
 193             c = (c & ~CHAR_SHIFT) - ('a' - 'A');
 194         }
 195       if (c & CHAR_CTL)
 196         {
 197           /* Simulate the code in lread.c.  */
 198           /* Allow `\C- ' and `\C-?'.  */
 199           if (c == (CHAR_CTL | ' '))
 200             c = 0;
 201           else if (c == (CHAR_CTL | '?'))
 202             c = 127;
 203           /* ASCII control chars are made from letters (both cases),
 204              as well as the non-letters within 0100...0137.  */
 205           else if ((c & 0137) >= 0101 && (c & 0137) <= 0132)
 206             c &= (037 | (~0177 & ~CHAR_CTL));
 207           else if ((c & 0177) >= 0100 && (c & 0177) <= 0137)
 208             c &= (037 | (~0177 & ~CHAR_CTL));
 209         }
 210
 211       /* If C still has any modifier bits, just ignore it.  */
 212       c &= ~CHAR_MODIFIER_MASK;
 213     }
 214   if (SINGLE_BYTE_CHAR_P (c))
 215     {
 216       if (ASCII_BYTE_P (c) || c >= 0xA0)
 217         *p++ = c;
 218       else
 219         {
 220           *p++ = LEADING_CODE_8_BIT_CONTROL;
 221           *p++ = c + 0x20;
 222         }
 223     }
 224   else if (CHAR_VALID_P (c, 0))
 225     {
 226       int charset, c1, c2;
 227
 228       SPLIT_CHAR (c, charset, c1, c2);
 229
 230       if (charset >= LEADING_CODE_EXT_11)
 231         *p++ = (charset < LEADING_CODE_EXT_12
 232                 ? LEADING_CODE_PRIVATE_11
 233                 : (charset < LEADING_CODE_EXT_21
 234                    ? LEADING_CODE_PRIVATE_12
 235                    : (charset < LEADING_CODE_EXT_22
 236                       ? LEADING_CODE_PRIVATE_21
 237                       : LEADING_CODE_PRIVATE_22)));
 238       *p++ = charset;
 239       if (c1 > 0 && c1 < 32 || c2 > 0 && c2 < 32)
 240         invalid_character (c);
 241       if (c1)
 242         {
 243           *p++ = c1 | 0x80;
 244           if (c2 > 0)
 245             *p++ = c2 | 0x80;
 246         }
 247     }
 248   else
 249     invalid_character (c);
 250
 251   return (p - str);
 252 }
 253
 254 /* Return the non-ASCII character corresponding to multi-byte form at
 255    STR of length LEN.  If ACTUAL_LEN is not NULL, store the byte
 256    length of the multibyte form in *ACTUAL_LEN.
 257
 258    Use macros STRING_CHAR or STRING_CHAR_AND_LENGTH instead of calling
 259    this function directly if you want ot handle ASCII characters as
 260    well.  */
 261
 262 int
 263 string_to_char (str, len, actual_len)
 264      const unsigned char *str;
 265      int len, *actual_len;
 266 {
 267   int c, bytes, charset, c1, c2;
 268
 269   SPLIT_MULTIBYTE_SEQ (str, len, bytes, charset, c1, c2);
 270   c = MAKE_CHAR (charset, c1, c2);
 271   if (actual_len)
 272     *actual_len = bytes;
 273   return c;
 274 }
 275
 276 /* Return the length of the multi-byte form at string STR of length LEN.
 277    Use the macro MULTIBYTE_FORM_LENGTH instead.  */
 278 int
 279 multibyte_form_length (str, len)
 280      const unsigned char *str;
 281      int len;
 282 {
 283   int bytes;
 284
 285   PARSE_MULTIBYTE_SEQ (str, len, bytes);
 286   return bytes;
 287 }
 288
 289 /* Check multibyte form at string STR of length LEN and set variables
 290    pointed by CHARSET, C1, and C2 to charset and position codes of the
 291    character at STR, and return 0.  If there's no multibyte character,
 292    return -1.  This should be used only in the macro SPLIT_STRING
 293    which checks range of STR in advance.  */
 294
 295 int
 296 split_string (str, len, charset, c1, c2)
 297      const unsigned char *str;
 298      unsigned char *c1, *c2;
 299      int len, *charset;
 300 {
 301   register int bytes, cs, code1, code2 = -1;
 302
 303   SPLIT_MULTIBYTE_SEQ (str, len, bytes, cs, code1, code2);
 304   if (cs == CHARSET_ASCII)
 305     return -1;
 306   *charset = cs;
 307   *c1 = code1;
 308   *c2 = code2;
 309   return 0;
 310 }
 311
 312 /* Return 1 iff character C has valid printable glyph.
 313    Use the macro CHAR_PRINTABLE_P instead.  */
 314 int
 315 char_printable_p (c)
 316      int c;
 317 {
 318   int charset, c1, c2;
 319
 320   if (ASCII_BYTE_P (c))
 321     return 1;
 322   else if (SINGLE_BYTE_CHAR_P (c))
 323     return 0;
 324   else if (c >= MAX_CHAR)
 325     return 0;
 326
 327   SPLIT_CHAR (c, charset, c1, c2);
 328   if (! CHARSET_DEFINED_P (charset))
 329     return 0;
 330   if (CHARSET_CHARS (charset) == 94
 331       ? c1 <= 32 || c1 >= 127
 332       : c1 < 32)
 333     return 0;
 334   if (CHARSET_DIMENSION (charset) == 2
 335       && (CHARSET_CHARS (charset) == 94
 336           ? c2 <= 32 || c2 >= 127
 337           : c2 < 32))
 338     return 0;
 339   return 1;
 340 }
 341
 342 /* Translate character C by translation table TABLE.  If C
 343    is negative, translate a character specified by CHARSET, C1, and C2
 344    (C1 and C2 are code points of the character).  If no translation is
 345    found in TABLE, return C.  */
 346 int
 347 translate_char (table, c, charset, c1, c2)
 348      Lisp_Object table;
 349      int c, charset, c1, c2;
 350 {
 351   Lisp_Object ch;
 352   int alt_charset, alt_c1, alt_c2, dimension;
 353
 354   if (c < 0) c = MAKE_CHAR (charset, (c1 & 0x7F) , (c2 & 0x7F));
 355   if (!CHAR_TABLE_P (table)
 356       || (ch = Faref (table, make_number (c)), !NATNUMP (ch)))
 357     return c;
 358
 359   SPLIT_CHAR (XFASTINT (ch), alt_charset, alt_c1, alt_c2);
 360   dimension = CHARSET_DIMENSION (alt_charset);
 361   if (dimension == 1 && alt_c1 > 0 || dimension == 2 && alt_c2 > 0)
 362     /* CH is not a generic character, just return it.  */
 363     return XFASTINT (ch);
 364
 365   /* Since CH is a generic character, we must return a specific
 366      charater which has the same position codes as C from CH.  */
 367   if (charset < 0)
 368     SPLIT_CHAR (c, charset, c1, c2);
 369   if (dimension != CHARSET_DIMENSION (charset))
 370     /* We can't make such a character because of dimension mismatch.  */
 371     return c;
 372   return MAKE_CHAR (alt_charset, c1, c2);
 373 }
 374
 375 /* Convert the unibyte character C to multibyte based on
 376    Vnonascii_translation_table or nonascii_insert_offset.  If they can't
 377    convert C to a valid multibyte character, convert it based on
 378    DEFAULT_NONASCII_INSERT_OFFSET which makes C a Latin-1 character.  */
 379
 380 int
 381 unibyte_char_to_multibyte (c)
 382      int c;
 383 {
 384   if (c < 0400 && c >= 0200)
 385     {
 386       int c_save = c;
 387
 388       if (! NILP (Vnonascii_translation_table))
 389         {
 390           c = XINT (Faref (Vnonascii_translation_table, make_number (c)));
 391           if (c >= 0400 && ! char_valid_p (c, 0))
 392             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 393         }
 394       else if (c >= 0240 && nonascii_insert_offset > 0)
 395         {
 396           c += nonascii_insert_offset;
 397           if (c < 0400 || ! char_valid_p (c, 0))
 398             c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 399         }
 400       else if (c >= 0240)
 401         c = c_save + DEFAULT_NONASCII_INSERT_OFFSET;
 402     }
 403   return c;
 404 }
 405
 406
 407 /* Convert the multibyte character C to unibyte 8-bit character based
 408    on Vnonascii_translation_table or nonascii_insert_offset.  If
 409    REV_TBL is non-nil, it should be a reverse table of
 410    Vnonascii_translation_table, i.e. what given by:
 411      Fchar_table_extra_slot (Vnonascii_translation_table, make_number (0))  */
 412
 413 int
 414 multibyte_char_to_unibyte (c, rev_tbl)
 415      int c;
 416      Lisp_Object rev_tbl;
 417 {
 418   if (!SINGLE_BYTE_CHAR_P (c))
 419     {
 420       int c_save = c;
 421
 422       if (! CHAR_TABLE_P (rev_tbl)
 423           && CHAR_TABLE_P (Vnonascii_translation_table))
 424         rev_tbl = Fchar_table_extra_slot (Vnonascii_translation_table,
 425                                           make_number (0));
 426       if (CHAR_TABLE_P (rev_tbl))
 427         {
 428           Lisp_Object temp;
 429           temp = Faref (rev_tbl, make_number (c));
 430           if (INTEGERP (temp))
 431             c = XINT (temp);
 432           if (c >= 256)
 433             c = (c_save & 0177) + 0200;
 434         }
 435       else
 436         {
 437           if (nonascii_insert_offset > 0)
 438             c -= nonascii_insert_offset;
 439           if (c < 128 || c >= 256)
 440             c = (c_save & 0177) + 0200;
 441         }
 442     }
 443
 444   return c;
 445 }
 446
 447 \f
 448 /* Update the table Vcharset_table with the given arguments (see the
 449    document of `define-charset' for the meaning of each argument).
 450    Several other table contents are also updated.  The caller should
 451    check the validity of CHARSET-ID and the remaining arguments in
 452    advance.  */
 453
 454 void
 455 update_charset_table (charset_id, dimension, chars, width, direction,
 456                       iso_final_char, iso_graphic_plane,
 457                       short_name, long_name, description)
 458      Lisp_Object charset_id, dimension, chars, width, direction;
 459      Lisp_Object iso_final_char, iso_graphic_plane;
 460      Lisp_Object short_name, long_name, description;
 461 {
 462   int charset = XINT (charset_id);
 463   int bytes;
 464   unsigned char leading_code_base, leading_code_ext;
 465
 466   if (NILP (CHARSET_TABLE_ENTRY (charset)))
 467     CHARSET_TABLE_ENTRY (charset)
 468       = Fmake_vector (make_number (CHARSET_MAX_IDX), Qnil);
 469
 470   if (NILP (long_name))
 471     long_name = short_name;
 472   if (NILP (description))
 473     description = long_name;
 474
 475   /* Get byte length of multibyte form, base leading-code, and
 476      extended leading-code of the charset.  See the comment under the
 477      title "GENERAL NOTE on CHARACTER SET (CHARSET)" in charset.h.  */
 478   bytes = XINT (dimension);
 479   if (charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 480     {
 481       /* Official charset, it doesn't have an extended leading-code.  */
 482       if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC)
 483         bytes += 1; /* For a base leading-code.  */
 484       leading_code_base = charset;
 485       leading_code_ext = 0;
 486     }
 487   else
 488     {
 489       /* Private charset.  */
 490       bytes += 2; /* For base and extended leading-codes.  */
 491       leading_code_base
 492         = (charset < LEADING_CODE_EXT_12
 493            ? LEADING_CODE_PRIVATE_11
 494            : (charset < LEADING_CODE_EXT_21
 495               ? LEADING_CODE_PRIVATE_12
 496               : (charset < LEADING_CODE_EXT_22
 497                  ? LEADING_CODE_PRIVATE_21
 498                  : LEADING_CODE_PRIVATE_22)));
 499       leading_code_ext = charset;
 500       if (BYTES_BY_CHAR_HEAD (leading_code_base) != bytes)
 501         error ("Invalid dimension for the charset-ID %d", charset);
 502     }
 503
 504   CHARSET_TABLE_INFO (charset, CHARSET_ID_IDX) = charset_id;
 505   CHARSET_TABLE_INFO (charset, CHARSET_BYTES_IDX) = make_number (bytes);
 506   CHARSET_TABLE_INFO (charset, CHARSET_DIMENSION_IDX) = dimension;
 507   CHARSET_TABLE_INFO (charset, CHARSET_CHARS_IDX) = chars;
 508   CHARSET_TABLE_INFO (charset, CHARSET_WIDTH_IDX) = width;
 509   CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX) = direction;
 510   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_BASE_IDX)
 511     = make_number (leading_code_base);
 512   CHARSET_TABLE_INFO (charset, CHARSET_LEADING_CODE_EXT_IDX)
 513     = make_number (leading_code_ext);
 514   CHARSET_TABLE_INFO (charset, CHARSET_ISO_FINAL_CHAR_IDX) = iso_final_char;
 515   CHARSET_TABLE_INFO (charset, CHARSET_ISO_GRAPHIC_PLANE_IDX)
 516     = iso_graphic_plane;
 517   CHARSET_TABLE_INFO (charset, CHARSET_SHORT_NAME_IDX) = short_name;
 518   CHARSET_TABLE_INFO (charset, CHARSET_LONG_NAME_IDX) = long_name;
 519   CHARSET_TABLE_INFO (charset, CHARSET_DESCRIPTION_IDX) = description;
 520   CHARSET_TABLE_INFO (charset, CHARSET_PLIST_IDX) = Qnil;
 521
 522   {
 523     /* If we have already defined a charset which has the same
 524        DIMENSION, CHARS and ISO-FINAL-CHAR but the different
 525        DIRECTION, we must update the entry REVERSE-CHARSET of both
 526        charsets.  If there's no such charset, the value of the entry
 527        is set to nil.  */
 528     int i;
 529
 530     for (i = 0; i <= MAX_CHARSET; i++)
 531       if (!NILP (CHARSET_TABLE_ENTRY (i)))
 532         {
 533           if (CHARSET_DIMENSION (i) == XINT (dimension)
 534               && CHARSET_CHARS (i) == XINT (chars)
 535               && CHARSET_ISO_FINAL_CHAR (i) == XINT (iso_final_char)
 536               && CHARSET_DIRECTION (i) != XINT (direction))
 537             {
 538               CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 539                 = make_number (i);
 540               CHARSET_TABLE_INFO (i, CHARSET_REVERSE_CHARSET_IDX) = charset_id;
 541               break;
 542             }
 543         }
 544     if (i > MAX_CHARSET)
 545       /* No such a charset.  */
 546       CHARSET_TABLE_INFO (charset, CHARSET_REVERSE_CHARSET_IDX)
 547         = make_number (-1);
 548   }
 549
 550   if (charset != CHARSET_ASCII && charset != CHARSET_8_BIT_GRAPHIC
 551       && charset < MIN_CHARSET_PRIVATE_DIMENSION1)
 552     {
 553       bytes_by_char_head[leading_code_base] = bytes;
 554       width_by_char_head[leading_code_base] = XINT (width);
 555
 556       /* Update table emacs_code_class.  */
 557       emacs_code_class[charset] = (bytes == 2
 558                                    ? EMACS_leading_code_2
 559                                    : (bytes == 3
 560                                       ? EMACS_leading_code_3
 561                                       : EMACS_leading_code_4));
 562     }
 563
 564   /* Update table iso_charset_table.  */
 565   if (XINT (iso_final_char) >= 0
 566       && ISO_CHARSET_TABLE (dimension, chars, iso_final_char) < 0)
 567     ISO_CHARSET_TABLE (dimension, chars, iso_final_char) = charset;
 568 }
 569
 570 #ifdef emacs
 571
 572 /* Return charset id of CHARSET_SYMBOL, or return -1 if CHARSET_SYMBOL
 573    is invalid.  */
 574 int
 575 get_charset_id (charset_symbol)
 576      Lisp_Object charset_symbol;
 577 {
 578   Lisp_Object val;
 579   int charset;
 580
 581   return ((SYMBOLP (charset_symbol)
 582            && (val = Fget (charset_symbol, Qcharset), VECTORP (val))
 583            && (charset = XINT (XVECTOR (val)->contents[CHARSET_ID_IDX]),
 584                CHARSET_VALID_P (charset)))
 585           ? charset : -1);
 586 }
 587
 588 /* Return an identification number for a new private charset of
 589    DIMENSION and WIDTH.  If there's no more room for the new charset,
 590    return 0.  */
 591 Lisp_Object
 592 get_new_private_charset_id (dimension, width)
 593      int dimension, width;
 594 {
 595   int charset, from, to;
 596
 597   if (dimension == 1)
 598     {
 599       from = LEADING_CODE_EXT_11;
 600       to = LEADING_CODE_EXT_21;
 601     }
 602   else
 603     {
 604       from = LEADING_CODE_EXT_21;
 605       to = LEADING_CODE_EXT_MAX + 1;
 606     }
 607
 608   for (charset = from; charset < to; charset++)
 609     if (!CHARSET_DEFINED_P (charset)) break;
 610
 611   return make_number (charset < to ? charset : 0);
 612 }
 613
 614 DEFUN ("define-charset", Fdefine_charset, Sdefine_charset, 3, 3, 0,
 615   "Define CHARSET-ID as the identification number of CHARSET with INFO-VECTOR.\n\
 616 If CHARSET-ID is nil, it is decided automatically, which means CHARSET is\n\
 617  treated as a private charset.\n\
 618 INFO-VECTOR is a vector of the format:\n\
 619    [DIMENSION CHARS WIDTH DIRECTION ISO-FINAL-CHAR ISO-GRAPHIC-PLANE\n\
 620     SHORT-NAME LONG-NAME DESCRIPTION]\n\
 621 The meanings of each elements is as follows:\n\
 622 DIMENSION (integer) is the number of bytes to represent a character: 1 or 2.\n\
 623 CHARS (integer) is the number of characters in a dimension: 94 or 96.\n\
 624 WIDTH (integer) is the number of columns a character in the charset\n\
 625 occupies on the screen: one of 0, 1, and 2.\n\
 626 \n\
 627 DIRECTION (integer) is the rendering direction of characters in the\n\
 628 charset when rendering.  If 0, render from left to right, else\n\
 629 render from right to left.\n\
 630 \n\
 631 ISO-FINAL-CHAR (character) is the final character of the\n\
 632 corresponding ISO 2022 charset.\n\
 633 It may be -1 if the charset is internal use only.\n\
 634 \n\
 635 ISO-GRAPHIC-PLANE (integer) is the graphic plane to be invoked\n\
 636 while encoding to variants of ISO 2022 coding system, one of the\n\
 637 following: 0/graphic-plane-left(GL), 1/graphic-plane-right(GR).\n\
 638 It may be -1 if the charset is internal use only.\n\
 639 \n\
 640 SHORT-NAME (string) is the short name to refer to the charset.\n\
 641 \n\
 642 LONG-NAME (string) is the long name to refer to the charset.\n\
 643 \n\
 644 DESCRIPTION (string) is the description string of the charset.")
 645   (charset_id, charset_symbol, info_vector)
 646      Lisp_Object charset_id, charset_symbol, info_vector;
 647 {
 648   Lisp_Object *vec;
 649
 650   if (!NILP (charset_id))
 651     CHECK_NUMBER (charset_id, 0);
 652   CHECK_SYMBOL (charset_symbol, 1);
 653   CHECK_VECTOR (info_vector, 2);
 654
 655   if (! NILP (charset_id))
 656     {
 657       if (! CHARSET_VALID_P (XINT (charset_id)))
 658         error ("Invalid CHARSET: %d", XINT (charset_id));
 659       else if (CHARSET_DEFINED_P (XINT (charset_id)))
 660         error ("Already defined charset: %d", XINT (charset_id));
 661     }
 662
 663   vec = XVECTOR (info_vector)->contents;
 664   if (XVECTOR (info_vector)->size != 9
 665       || !INTEGERP (vec[0]) || !(XINT (vec[0]) == 1 || XINT (vec[0]) == 2)
 666       || !INTEGERP (vec[1]) || !(XINT (vec[1]) == 94 || XINT (vec[1]) == 96)
 667       || !INTEGERP (vec[2]) || !(XINT (vec[2]) == 1 || XINT (vec[2]) == 2)
 668       || !INTEGERP (vec[3]) || !(XINT (vec[3]) == 0 || XINT (vec[3]) == 1)
 669       || !INTEGERP (vec[4])
 670       || !(XINT (vec[4]) == -1 || XINT (vec[4]) >= '0' && XINT (vec[4]) <= '~')
 671       || !INTEGERP (vec[5])
 672       || !(XINT (vec[5]) == -1 || XINT (vec[5]) == 0 || XINT (vec[5]) == 1)
 673       || !STRINGP (vec[6])
 674       || !STRINGP (vec[7])
 675       || !STRINGP (vec[8]))
 676     error ("Invalid info-vector argument for defining charset %s",
 677            XSYMBOL (charset_symbol)->name->data);
 678
 679   if (NILP (charset_id))
 680     {
 681       charset_id = get_new_private_charset_id (XINT (vec[0]), XINT (vec[2]));
 682       if (XINT (charset_id) == 0)
 683         error ("There's no room for a new private charset %s",
 684                XSYMBOL (charset_symbol)->name->data);
 685     }
 686
 687   update_charset_table (charset_id, vec[0], vec[1], vec[2], vec[3],
 688                         vec[4], vec[5], vec[6], vec[7], vec[8]);
 689   Fput (charset_symbol, Qcharset, CHARSET_TABLE_ENTRY (XINT (charset_id)));
 690   CHARSET_SYMBOL (XINT (charset_id)) = charset_symbol;
 691   Vcharset_list = Fcons (charset_symbol, Vcharset_list);
 692   return Qnil;
 693 }
 694
 695 DEFUN ("generic-character-list", Fgeneric_character_list,
 696        Sgeneric_character_list, 0, 0, 0,
 697   "Return a list of all possible generic characters.\n\
 698 It includes a generic character for a charset not yet defined.")
 699   ()
 700 {
 701   return Vgeneric_character_list;
 702 }
 703
 704 DEFUN ("get-unused-iso-final-char", Fget_unused_iso_final_char,
 705        Sget_unused_iso_final_char, 2, 2, 0,
 706   "Return an unsed ISO's final char for a charset of DIMENISION and CHARS.\n\
 707 DIMENSION is the number of bytes to represent a character: 1 or 2.\n\
 708 CHARS is the number of characters in a dimension: 94 or 96.\n\
 709 \n\
 710 This final char is for private use, thus the range is `0' (48) .. `?' (63).\n\
 711 If there's no unused final char for the specified kind of charset,\n\
 712 return nil.")
 713   (dimension, chars)
 714      Lisp_Object dimension, chars;
 715 {
 716   int final_char;
 717
 718   CHECK_NUMBER (dimension, 0);
 719   CHECK_NUMBER (chars, 1);
 720   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 721     error ("Invalid charset dimension %d, it should be 1 or 2",
 722            XINT (dimension));
 723   if (XINT (chars) != 94 && XINT (chars) != 96)
 724     error ("Invalid charset chars %d, it should be 94 or 96",
 725            XINT (chars));
 726   for (final_char = '0'; final_char <= '?'; final_char++)
 727     {
 728       if (ISO_CHARSET_TABLE (dimension, chars, make_number (final_char)) < 0)
 729         break;
 730     }
 731   return (final_char <= '?' ? make_number (final_char) : Qnil);
 732 }
 733
 734 DEFUN ("declare-equiv-charset", Fdeclare_equiv_charset, Sdeclare_equiv_charset,
 735        4, 4, 0,
 736   "Declare a charset of DIMENSION, CHARS, FINAL-CHAR is the same as CHARSET.\n\
 737 CHARSET should be defined by `defined-charset' in advance.")
 738   (dimension, chars, final_char, charset_symbol)
 739      Lisp_Object dimension, chars, final_char, charset_symbol;
 740 {
 741   int charset;
 742
 743   CHECK_NUMBER (dimension, 0);
 744   CHECK_NUMBER (chars, 1);
 745   CHECK_NUMBER (final_char, 2);
 746   CHECK_SYMBOL (charset_symbol, 3);
 747
 748   if (XINT (dimension) != 1 && XINT (dimension) != 2)
 749     error ("Invalid DIMENSION %d, it should be 1 or 2", XINT (dimension));
 750   if (XINT (chars) != 94 && XINT (chars) != 96)
 751     error ("Invalid CHARS %d, it should be 94 or 96", XINT (chars));
 752   if (XINT (final_char) < '0' || XFASTINT (final_char) > '~')
 753     error ("Invalid FINAL-CHAR %c, it should be `0'..`~'", XINT (chars));
 754   if ((charset = get_charset_id (charset_symbol)) < 0)
 755     error ("Invalid charset %s", XSYMBOL (charset_symbol)->name->data);
 756
 757   ISO_CHARSET_TABLE (dimension, chars, final_char) = charset;
 758   return Qnil;
 759 }
 760
 761 /* Return information about charsets in the text at PTR of NBYTES
 762    bytes, which are NCHARS characters.  The value is:
 763
 764         0: Each character is represented by one byte.  This is always
 765            true for unibyte text.
 766         1: No charsets other than ascii eight-bit-control,
 767            eight-bit-graphic, and latin-1 are found.
 768         2: Otherwise.
 769
 770    In addition, if CHARSETS is nonzero, for each found charset N, set
 771    CHARSETS[N] to 1.  For that, callers should allocate CHARSETS
 772    (MAX_CHARSET + 1 elements) in advance.  It may lookup a translation
 773    table TABLE if supplied.  For invalid charsets, set CHARSETS[1] to
 774    1 (note that there's no charset whose ID is 1).  */
 775
 776 int
 777 find_charset_in_text (ptr, nchars, nbytes, charsets, table)
 778      unsigned char *ptr;
 779      int nchars, nbytes, *charsets;
 780      Lisp_Object table;
 781 {
 782   if (nchars == nbytes)
 783     {
 784       if (charsets && nbytes > 0)
 785         {
 786           unsigned char *endp = ptr + nbytes;
 787           int maskbits = 0;
 788
 789           while (ptr < endp && maskbits != 7)
 790             {
 791               maskbits |= (*ptr < 0x80 ? 1 : *ptr < 0xA0 ? 2 : 4);
 792               ptr++;
 793             }
 794
 795           if (maskbits & 1)
 796             charsets[CHARSET_ASCII] = 1;
 797           if (maskbits & 2)
 798             charsets[CHARSET_8_BIT_CONTROL] = 1;
 799           if (maskbits & 4)
 800             charsets[CHARSET_8_BIT_GRAPHIC] = 1;
 801         }
 802       return 0;
 803     }
 804   else
 805     {
 806       int return_val = 1;
 807       int bytes, charset, c1, c2;
 808
 809       if (! CHAR_TABLE_P (table))
 810         table = Qnil;
 811
 812       while (nchars-- > 0)
 813         {
 814           SPLIT_MULTIBYTE_SEQ (ptr, len, bytes, charset, c1, c2);
 815           ptr += bytes;
 816
 817           if (!CHARSET_DEFINED_P (charset))
 818             charset = 1;
 819           else if (! NILP (table))
 820             {
 821               int c = translate_char (table, -1, charset, c1, c2);
 822               if (c >= 0)
 823                 charset = CHAR_CHARSET (c);
 824             }
 825
 826           if (return_val == 1
 827               && charset != CHARSET_ASCII
 828               && charset != CHARSET_8_BIT_CONTROL
 829               && charset != CHARSET_8_BIT_GRAPHIC
 830               && charset != charset_latin_iso8859_1)
 831             return_val = 2;
 832
 833           if (charsets)
 834             charsets[charset] = 1;
 835           else if (return_val == 2)
 836             break;
 837         }
 838       return return_val;
 839     }
 840 }
 841
 842 DEFUN ("find-charset-region", Ffind_charset_region, Sfind_charset_region,
 843        2, 3, 0,
 844   "Return a list of charsets in the region between BEG and END.\n\
 845 BEG and END are buffer positions.\n\
 846 Optional arg TABLE if non-nil is a translation table to look up.\n\
 847 \n\
 848 If the region contains invalid multibyte characters,\n\
 849 `unknown' is included in the returned list.\n\
 850 \n\
 851 If the current buffer is unibyte, the returned list may contain\n\
 852 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
 853   (beg, end, table)
 854      Lisp_Object beg, end, table;
 855 {
 856   int charsets[MAX_CHARSET + 1];
 857   int from, from_byte, to, stop, stop_byte, i;
 858   Lisp_Object val;
 859
 860   validate_region (&beg, &end);
 861   from = XFASTINT (beg);
 862   stop = to = XFASTINT (end);
 863
 864   if (from < GPT && GPT < to)
 865     {
 866       stop = GPT;
 867       stop_byte = GPT_BYTE;
 868     }
 869   else
 870     stop_byte = CHAR_TO_BYTE (stop);
 871
 872   from_byte = CHAR_TO_BYTE (from);
 873
 874   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 875   while (1)
 876     {
 877       find_charset_in_text (BYTE_POS_ADDR (from_byte), stop - from,
 878                             stop_byte - from_byte, charsets, table);
 879       if (stop < to)
 880         {
 881           from = stop, from_byte = stop_byte;
 882           stop = to, stop_byte = CHAR_TO_BYTE (stop);
 883         }
 884       else
 885         break;
 886     }
 887
 888   val = Qnil;
 889   if (charsets[1])
 890     val = Fcons (Qunknown, val);
 891   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 892     if (charsets[i])
 893       val = Fcons (CHARSET_SYMBOL (i), val);
 894   if (charsets[0])
 895     val = Fcons (Qascii, val);
 896   return val;
 897 }
 898
 899 DEFUN ("find-charset-string", Ffind_charset_string, Sfind_charset_string,
 900        1, 2, 0,
 901   "Return a list of charsets in STR.\n\
 902 Optional arg TABLE if non-nil is a translation table to look up.\n\
 903 \n\
 904 If the string contains invalid multibyte characters,\n\
 905 `unknown' is included in the returned list.\n\
 906 \n\
 907 If STR is unibyte, the returned list may contain\n\
 908 only `ascii', `eight-bit-control', and `eight-bit-graphic'.")
 909   (str, table)
 910      Lisp_Object str, table;
 911 {
 912   int charsets[MAX_CHARSET + 1];
 913   int i;
 914   Lisp_Object val;
 915
 916   CHECK_STRING (str, 0);
 917
 918   bzero (charsets, (MAX_CHARSET + 1) * sizeof (int));
 919   find_charset_in_text (XSTRING (str)->data, XSTRING (str)->size,
 920                         STRING_BYTES (XSTRING (str)), charsets, table);
 921
 922   val = Qnil;
 923   if (charsets[1])
 924     val = Fcons (Qunknown, val);
 925   for (i = MAX_CHARSET; i >= MIN_CHARSET_OFFICIAL_DIMENSION1; i--)
 926     if (charsets[i])
 927       val = Fcons (CHARSET_SYMBOL (i), val);
 928   if (charsets[0])
 929     val = Fcons (Qascii, val);
 930   return val;
 931 }
 932
 933 \f
 934 DEFUN ("make-char-internal", Fmake_char_internal, Smake_char_internal, 1, 3, 0,
 935   "")
 936   (charset, code1, code2)
 937      Lisp_Object charset, code1, code2;
 938 {
 939   int charset_id, c1, c2;
 940
 941   CHECK_NUMBER (charset, 0);
 942   charset_id = XINT (charset);
 943   if (!CHARSET_DEFINED_P (charset_id))
 944     error ("Invalid charset ID: %d", XINT (charset));
 945
 946   if (NILP (code1))
 947     c1 = 0;
 948   else
 949     {
 950       CHECK_NUMBER (code1, 1);
 951       c1 = XINT (code1);
 952     }
 953   if (NILP (code2))
 954     c2 = 0;
 955   else
 956     {
 957       CHECK_NUMBER (code2, 2);
 958       c2 = XINT (code2);
 959     }
 960
 961   if (charset_id == CHARSET_ASCII)
 962     {
 963       if (c1 < 0 || c1 > 0x7F)
 964         goto invalid_code_posints;
 965       return make_number (c1);
 966     }
 967   else if (charset_id == CHARSET_8_BIT_CONTROL)
 968     {
 969       if (NILP (code1))
 970         c1 = 0x80;
 971       else if (c1 < 0x80 || c1 > 0x9F)
 972         goto invalid_code_posints;
 973       return make_number (c1);
 974     }
 975   else if (charset_id == CHARSET_8_BIT_GRAPHIC)
 976     {
 977       if (NILP (code1))
 978         c1 = 0xA0;
 979       else if (c1 < 0xA0 || c1 > 0xFF)
 980         goto invalid_code_posints;
 981       return make_number (c1);
 982     }
 983   else if (c1 < 0 || c1 > 0xFF || c2 < 0 || c2 > 0xFF)
 984     goto invalid_code_posints;
 985   c1 &= 0x7F;
 986   c2 &= 0x7F;
 987   if (c1 == 0
 988       ? c2 != 0
 989       : (c2 == 0
 990          ? !CHAR_COMPONENTS_VALID_P (charset_id, c1, 0x20)
 991          : !CHAR_COMPONENTS_VALID_P (charset_id, c1, c2)))
 992     goto invalid_code_posints;
 993   return make_number (MAKE_CHAR (charset_id, c1, c2));
 994
 995  invalid_code_posints:
 996   error ("Invalid code points for charset ID %d: %d %d", charset_id, c1, c2);
 997 }
 998
 999 DEFUN ("split-char", Fsplit_char, Ssplit_char, 1, 1, 0,
1000   "Return list of charset and one or two position-codes of CHAR.\n\
1001 If CHAR is invalid as a character code,\n\
1002 return a list of symbol `unknown' and CHAR.")
1003   (ch)
1004      Lisp_Object ch;
1005 {
1006   int c, charset, c1, c2;
1007
1008   CHECK_NUMBER (ch, 0);
1009   c = XFASTINT (ch);
1010   if (!CHAR_VALID_P (c, 1))
1011     return Fcons (Qunknown, Fcons (ch, Qnil));
1012   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
1013   return (c2 >= 0
1014           ? Fcons (CHARSET_SYMBOL (charset),
1015                    Fcons (make_number (c1), Fcons (make_number (c2), Qnil)))
1016           : Fcons (CHARSET_SYMBOL (charset), Fcons (make_number (c1), Qnil)));
1017 }
1018
1019 DEFUN ("char-charset", Fchar_charset, Schar_charset, 1, 1, 0,
1020   "Return charset of CHAR.")
1021   (ch)
1022      Lisp_Object ch;
1023 {
1024   CHECK_NUMBER (ch, 0);
1025
1026   return CHARSET_SYMBOL (CHAR_CHARSET (XINT (ch)));
1027 }
1028
1029 DEFUN ("charset-after", Fcharset_after, Scharset_after, 0, 1, 0,
1030   "Return charset of a character in the current buffer at position POS.\n\
1031 If POS is nil, it defauls to the current point.\n\
1032 If POS is out of range, the value is nil.")
1033   (pos)
1034      Lisp_Object pos;
1035 {
1036   Lisp_Object ch;
1037   int charset;
1038
1039   ch = Fchar_after (pos);
1040   if (! INTEGERP (ch))
1041     return ch;
1042   charset = CHAR_CHARSET (XINT (ch));
1043   return CHARSET_SYMBOL (charset);
1044 }
1045
1046 DEFUN ("iso-charset", Fiso_charset, Siso_charset, 3, 3, 0,
1047   "Return charset of ISO's specification DIMENSION, CHARS, and FINAL-CHAR.\n\
1048 \n\
1049 ISO 2022's designation sequence (escape sequence) distinguishes charsets\n\
1050 by their DIMENSION, CHARS, and FINAL-CHAR,\n\
1051 where as Emacs distinguishes them by charset symbol.\n\
1052 See the documentation of the function `charset-info' for the meanings of\n\
1053 DIMENSION, CHARS, and FINAL-CHAR.")
1054   (dimension, chars, final_char)
1055      Lisp_Object dimension, chars, final_char;
1056 {
1057   int charset;
1058
1059   CHECK_NUMBER (dimension, 0);
1060   CHECK_NUMBER (chars, 1);
1061   CHECK_NUMBER (final_char, 2);
1062
1063   if ((charset = ISO_CHARSET_TABLE (dimension, chars, final_char)) < 0)
1064     return Qnil;
1065   return CHARSET_SYMBOL (charset);
1066 }
1067
1068 /* If GENERICP is nonzero, return nonzero iff C is a valid normal or
1069    generic character.  If GENERICP is zero, return nonzero iff C is a
1070    valid normal character.  Do not call this function directly,
1071    instead use macro CHAR_VALID_P.  */
1072 int
1073 char_valid_p (c, genericp)
1074      int c, genericp;
1075 {
1076   int charset, c1, c2;
1077
1078   if (c < 0 || c >= MAX_CHAR)
1079     return 0;
1080   if (SINGLE_BYTE_CHAR_P (c))
1081     return 1;
1082   SPLIT_CHAR (c, charset, c1, c2);
1083   if (genericp)
1084     {
1085       if (c1)
1086         {
1087           if (c2 <= 0) c2 = 0x20;
1088         }
1089       else
1090         {
1091           if (c2 <= 0) c1 = c2 = 0x20;
1092         }
1093     }
1094   return (CHARSET_DEFINED_P (charset)
1095           && CHAR_COMPONENTS_VALID_P (charset, c1, c2));
1096 }
1097
1098 DEFUN ("char-valid-p", Fchar_valid_p, Schar_valid_p, 1, 2, 0,
1099   "Return t if OBJECT is a valid normal character.\n\
1100 If optional arg GENERICP is non-nil, also return t if OBJECT is\n\
1101 a valid generic character.")
1102   (object, genericp)
1103      Lisp_Object object, genericp;
1104 {
1105   if (! NATNUMP (object))
1106     return Qnil;
1107   return (CHAR_VALID_P (XFASTINT (object), !NILP (genericp)) ? Qt : Qnil);
1108 }
1109
1110 DEFUN ("unibyte-char-to-multibyte", Funibyte_char_to_multibyte,
1111        Sunibyte_char_to_multibyte, 1, 1, 0,
1112   "Convert the unibyte character CH to multibyte character.\n\
1113 The conversion is done based on `nonascii-translation-table' (which see)\n\
1114  or `nonascii-insert-offset' (which see).")
1115   (ch)
1116      Lisp_Object ch;
1117 {
1118   int c;
1119
1120   CHECK_NUMBER (ch, 0);
1121   c = XINT (ch);
1122   if (c < 0 || c >= 0400)
1123     error ("Invalid unibyte character: %d", c);
1124   c = unibyte_char_to_multibyte (c);
1125   if (c < 0)
1126     error ("Can't convert to multibyte character: %d", XINT (ch));
1127   return make_number (c);
1128 }
1129
1130 DEFUN ("multibyte-char-to-unibyte", Fmultibyte_char_to_unibyte,
1131        Smultibyte_char_to_unibyte, 1, 1, 0,
1132   "Convert the multibyte character CH to unibyte character.\n\
1133 The conversion is done based on `nonascii-translation-table' (which see)\n\
1134  or `nonascii-insert-offset' (which see).")
1135   (ch)
1136      Lisp_Object ch;
1137 {
1138   int c;
1139
1140   CHECK_NUMBER (ch, 0);
1141   c = XINT (ch);
1142   if (! CHAR_VALID_P (c, 0))
1143     error ("Invalid multibyte character: %d", c);
1144   c = multibyte_char_to_unibyte (c, Qnil);
1145   if (c < 0)
1146     error ("Can't convert to unibyte character: %d", XINT (ch));
1147   return make_number (c);
1148 }
1149
1150 DEFUN ("char-bytes", Fchar_bytes, Schar_bytes, 1, 1, 0,
1151   "Return 1 regardless of the argument CHAR.\n\
1152 This is now an obsolete function.  We keep it just for backward compatibility.")
1153   (ch)
1154      Lisp_Object ch;
1155 {
1156   CHECK_NUMBER (ch, 0);
1157   return make_number (1);
1158 }
1159
1160 /* Return how many bytes C will occupy in a multibyte buffer.
1161    Don't call this function directly, instead use macro CHAR_BYTES.  */
1162 int
1163 char_bytes (c)
1164      int c;
1165 {
1166   int charset;
1167
1168   if (ASCII_BYTE_P (c) || (c & ~((1 << CHARACTERBITS) -1)))
1169     return 1;
1170   if (SINGLE_BYTE_CHAR_P (c) && c >= 0xA0)
1171     return 1;
1172
1173   charset = CHAR_CHARSET (c);
1174   return (CHARSET_DEFINED_P (charset) ? CHARSET_BYTES (charset) : 1);
1175 }
1176
1177 /* Return the width of character of which multi-byte form starts with
1178    C.  The width is measured by how many columns occupied on the
1179    screen when displayed in the current buffer.  */
1180
1181 #define ONE_BYTE_CHAR_WIDTH(c)                                          \
1182   (c < 0x20                                                             \
1183    ? (c == '\t'                                                         \
1184       ? XFASTINT (current_buffer->tab_width)                            \
1185       : (c == '\n' ? 0 : (NILP (current_buffer->ctl_arrow) ? 4 : 2)))   \
1186    : (c < 0x7f                                                          \
1187       ? 1                                                               \
1188       : (c == 0x7F                                                      \
1189          ? (NILP (current_buffer->ctl_arrow) ? 4 : 2)                   \
1190          : ((! NILP (current_buffer->enable_multibyte_characters)       \
1191              && BASE_LEADING_CODE_P (c))                                \
1192             ? WIDTH_BY_CHAR_HEAD (c)                                    \
1193             : 4))))
1194
1195 DEFUN ("char-width", Fchar_width, Schar_width, 1, 1, 0,
1196   "Return width of CHAR when displayed in the current buffer.\n\
1197 The width is measured by how many columns it occupies on the screen.\n\
1198 Tab is taken to occupy `tab-width' columns.")
1199   (ch)
1200        Lisp_Object ch;
1201 {
1202   Lisp_Object val, disp;
1203   int c;
1204   struct Lisp_Char_Table *dp = buffer_display_table ();
1205
1206   CHECK_NUMBER (ch, 0);
1207
1208   c = XINT (ch);
1209
1210   /* Get the way the display table would display it.  */
1211   disp = dp ? DISP_CHAR_VECTOR (dp, c) : Qnil;
1212
1213   if (VECTORP (disp))
1214     XSETINT (val, XVECTOR (disp)->size);
1215   else if (SINGLE_BYTE_CHAR_P (c))
1216     XSETINT (val, ONE_BYTE_CHAR_WIDTH (c));
1217   else
1218     {
1219       int charset = CHAR_CHARSET (c);
1220
1221       XSETFASTINT (val, CHARSET_WIDTH (charset));
1222     }
1223   return val;
1224 }
1225
1226 /* Return width of string STR of length LEN when displayed in the
1227    current buffer.  The width is measured by how many columns it
1228    occupies on the screen.  */
1229
1230 int
1231 strwidth (str, len)
1232      unsigned char *str;
1233      int len;
1234 {
1235   return c_string_width (str, len, -1, NULL, NULL);
1236 }
1237
1238 /* Return width of string STR of length LEN when displayed in the
1239    current buffer.  The width is measured by how many columns it
1240    occupies on the screen.  If PRECISION > 0, return the width of
1241    longest substring that doesn't exceed PRECISION, and set number of
1242    characters and bytes of the substring in *NCHARS and *NBYTES
1243    respectively.  */
1244
1245 int
1246 c_string_width (str, len, precision, nchars, nbytes)
1247      unsigned char *str;
1248      int precision, *nchars, *nbytes;
1249 {
1250   int i = 0, i_byte = 0;
1251   int width = 0;
1252   int chars;
1253   struct Lisp_Char_Table *dp = buffer_display_table ();
1254
1255   while (i_byte < len)
1256     {
1257       int bytes, thiswidth;
1258       Lisp_Object val;
1259
1260       if (dp)
1261         {
1262           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1263
1264           chars = 1;
1265           val = DISP_CHAR_VECTOR (dp, c);
1266           if (VECTORP (val))
1267             thiswidth = XVECTOR (val)->size;
1268           else
1269             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1270         }
1271       else
1272         {
1273           chars = 1;
1274           PARSE_MULTIBYTE_SEQ (str + i_byte, len - i_byte, bytes);
1275           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1276         }
1277
1278       if (precision > 0
1279           && (width + thiswidth > precision))
1280         {
1281           *nchars = i;
1282           *nbytes = i_byte;
1283           return width;
1284         }
1285       i++;
1286       i_byte += bytes;
1287       width += thiswidth;
1288   }
1289
1290   if (precision > 0)
1291     {
1292       *nchars = i;
1293       *nbytes = i_byte;
1294     }
1295
1296   return width;
1297 }
1298
1299 /* Return width of Lisp string STRING when displayed in the current
1300    buffer.  The width is measured by how many columns it occupies on
1301    the screen while paying attention to compositions.  If PRECISION >
1302    0, return the width of longest substring that doesn't exceed
1303    PRECISION, and set number of characters and bytes of the substring
1304    in *NCHARS and *NBYTES respectively.  */
1305
1306 int
1307 lisp_string_width (string, precision, nchars, nbytes)
1308      Lisp_Object string;
1309      int precision, *nchars, *nbytes;
1310 {
1311   int len = XSTRING (string)->size;
1312   int len_byte = STRING_BYTES (XSTRING (string));
1313   unsigned char *str = XSTRING (string)->data;
1314   int i = 0, i_byte = 0;
1315   int width = 0;
1316   struct Lisp_Char_Table *dp = buffer_display_table ();
1317
1318   while (i < len)
1319     {
1320       int chars, bytes, thiswidth;
1321       Lisp_Object val;
1322       int cmp_id;
1323       int ignore, end;
1324
1325       if (find_composition (i, -1, &ignore, &end, &val, string)
1326           && ((cmp_id = get_composition_id (i, i_byte, end - i, val, string))
1327               >= 0))
1328         {
1329           thiswidth = composition_table[cmp_id]->width;
1330           chars = end - i;
1331           bytes = string_char_to_byte (string, end) - i_byte;
1332         }
1333       else if (dp)
1334         {
1335           int c = STRING_CHAR_AND_LENGTH (str + i_byte, len - i_byte, bytes);
1336
1337           chars = 1;
1338           val = DISP_CHAR_VECTOR (dp, c);
1339           if (VECTORP (val))
1340             thiswidth = XVECTOR (val)->size;
1341           else
1342             thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1343         }
1344       else
1345         {
1346           chars = 1;
1347           PARSE_MULTIBYTE_SEQ (str + i_byte, len_byte - i_byte, bytes);
1348           thiswidth = ONE_BYTE_CHAR_WIDTH (str[i_byte]);
1349         }
1350
1351       if (precision > 0
1352           && (width + thiswidth > precision))
1353         {
1354           *nchars = i;
1355           *nbytes = i_byte;
1356           return width;
1357         }
1358       i += chars;
1359       i_byte += bytes;
1360       width += thiswidth;
1361   }
1362
1363   if (precision > 0)
1364     {
1365       *nchars = i;
1366       *nbytes = i_byte;
1367     }
1368
1369   return width;
1370 }
1371
1372 DEFUN ("string-width", Fstring_width, Sstring_width, 1, 1, 0,
1373   "Return width of STRING when displayed in the current buffer.\n\
1374 Width is measured by how many columns it occupies on the screen.\n\
1375 When calculating width of a multibyte character in STRING,\n\
1376 only the base leading-code is considered; the validity of\n\
1377 the following bytes is not checked.  Tabs in STRING are always\n\
1378 taken to occupy `tab-width' columns.")
1379   (str)
1380      Lisp_Object str;
1381 {
1382   Lisp_Object val;
1383
1384   CHECK_STRING (str, 0);
1385   XSETFASTINT (val, lisp_string_width (str, -1, NULL, NULL));
1386   return val;
1387 }
1388
1389 DEFUN ("char-direction", Fchar_direction, Schar_direction, 1, 1, 0,
1390   "Return the direction of CHAR.\n\
1391 The returned value is 0 for left-to-right and 1 for right-to-left.")
1392   (ch)
1393      Lisp_Object ch;
1394 {
1395   int charset;
1396
1397   CHECK_NUMBER (ch, 0);
1398   charset = CHAR_CHARSET (XFASTINT (ch));
1399   if (!CHARSET_DEFINED_P (charset))
1400     invalid_character (XINT (ch));
1401   return CHARSET_TABLE_INFO (charset, CHARSET_DIRECTION_IDX);
1402 }
1403
1404 DEFUN ("chars-in-region", Fchars_in_region, Schars_in_region, 2, 2, 0,
1405   "Return number of characters between BEG and END.")
1406   (beg, end)
1407      Lisp_Object beg, end;
1408 {
1409   int from, to;
1410
1411   CHECK_NUMBER_COERCE_MARKER (beg, 0);
1412   CHECK_NUMBER_COERCE_MARKER (end, 1);
1413
1414   from = min (XFASTINT (beg), XFASTINT (end));
1415   to = max (XFASTINT (beg), XFASTINT (end));
1416
1417   return make_number (to - from);
1418 }
1419
1420 /* Return the number of characters in the NBYTES bytes at PTR.
1421    This works by looking at the contents and checking for multibyte sequences.
1422    However, if the current buffer has enable-multibyte-characters = nil,
1423    we treat each byte as a character.  */
1424
1425 int
1426 chars_in_text (ptr, nbytes)
1427      unsigned char *ptr;
1428      int nbytes;
1429 {
1430   /* current_buffer is null at early stages of Emacs initialization.  */
1431   if (current_buffer == 0
1432       || NILP (current_buffer->enable_multibyte_characters))
1433     return nbytes;
1434
1435   return multibyte_chars_in_text (ptr, nbytes);
1436 }
1437
1438 /* Return the number of characters in the NBYTES bytes at PTR.
1439    This works by looking at the contents and checking for multibyte sequences.
1440    It ignores enable-multibyte-characters.  */
1441
1442 int
1443 multibyte_chars_in_text (ptr, nbytes)
1444      unsigned char *ptr;
1445      int nbytes;
1446 {
1447   unsigned char *endp;
1448   int chars, bytes;
1449
1450   endp = ptr + nbytes;
1451   chars = 0;
1452
1453   while (ptr < endp)
1454     {
1455       PARSE_MULTIBYTE_SEQ (ptr, endp - ptr, bytes);
1456       ptr += bytes;
1457       chars++;
1458     }
1459
1460   return chars;
1461 }
1462
1463 /* Parse unibyte text at STR of LEN bytes as multibyte text, and
1464    count the numbers of characters and bytes in it.  On counting
1465    bytes, pay attention to the fact that 8-bit characters in the range
1466    0x80..0x9F are represented by 2 bytes in multibyte text.  */
1467 void
1468 parse_str_as_multibyte (str, len, nchars, nbytes)
1469      unsigned char *str;
1470      int len, *nchars, *nbytes;
1471 {
1472   unsigned char *endp = str + len;
1473   int n, chars = 0, bytes = 0;
1474
1475   while (str < endp)
1476     {
1477       if (UNIBYTE_STR_AS_MULTIBYTE_P (str, endp - str, n))
1478         str += n, bytes += n;
1479       else
1480         str++, bytes += 2;
1481       chars++;
1482     }
1483   *nchars = chars;
1484   *nbytes = bytes;
1485   return;
1486 }
1487
1488 /* Arrange unibyte text at STR of NBYTES bytes as multibyte text.
1489    It actually converts only 8-bit characters in the range 0x80..0x9F
1490    that don't contruct multibyte characters to multibyte forms.  If
1491    NCHARS is nonzero, set *NCHARS to the number of characters in the
1492    text.  It is assured that we can use LEN bytes at STR as a work
1493    area and that is enough.  Return the number of bytes of the
1494    resulting text.  */
1495
1496 int
1497 str_as_multibyte (str, len, nbytes, nchars)
1498      unsigned char *str;
1499      int len, nbytes, *nchars;
1500 {
1501   unsigned char *p = str, *endp = str + nbytes;
1502   unsigned char *to;
1503   int chars = 0;
1504   int n;
1505
1506   while (p < endp && UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1507     p += n, chars++;
1508   if (nchars)
1509     *nchars = chars;
1510   if (p == endp)
1511     return nbytes;
1512
1513   to = p;
1514   nbytes = endp - p;
1515   endp = str + len;
1516   safe_bcopy (p, endp - nbytes, nbytes);
1517   p = endp - nbytes;
1518   while (p < endp)
1519     {
1520       if (UNIBYTE_STR_AS_MULTIBYTE_P (p, endp - p, n))
1521         {
1522           while (n--)
1523             *to++ = *p++;
1524         }
1525       else
1526         {
1527           *to++ = LEADING_CODE_8_BIT_CONTROL;
1528           *to++ = *p++ + 0x20;
1529         }
1530       chars++;
1531     }
1532   if (nchars)
1533     *nchars = chars;
1534   return (to - str);
1535 }
1536
1537 /* Parse unibyte string at STR of LEN bytes, and return the number of
1538    bytes it may ocupy when converted to multibyte string by
1539    `str_to_multibyte'.  */
1540
1541 int
1542 parse_str_to_multibyte (str, len)
1543      unsigned char *str;
1544      int len;
1545 {
1546   unsigned char *endp = str + len;
1547   int bytes;
1548
1549   for (bytes = 0; str < endp; str++)
1550     bytes += (*str < 0x80 || *str >= 0xA0) ? 1 : 2;
1551   return bytes;
1552 }
1553
1554 /* Convert unibyte text at STR of NBYTES bytes to multibyte text
1555    that contains the same single-byte characters.  It actually
1556    converts all 8-bit characters to multibyte forms.  It is assured
1557    that we can use LEN bytes at STR as a work area and that is
1558    enough.  */
1559
1560 int
1561 str_to_multibyte (str, len, bytes)
1562      unsigned char *str;
1563      int len, bytes;
1564 {
1565   unsigned char *p = str, *endp = str + bytes;
1566   unsigned char *to;
1567
1568   while (p < endp && (*p < 0x80 || *p >= 0xA0)) p++;
1569   if (p == endp)
1570     return bytes;
1571   to = p;
1572   bytes = endp - p;
1573   endp = str + len;
1574   safe_bcopy (p, endp - bytes, bytes);
1575   p = endp - bytes;
1576   while (p < endp)
1577     {
1578       if (*p < 0x80 || *p >= 0xA0)
1579         *to++ = *p++;
1580       else
1581         *to++ = LEADING_CODE_8_BIT_CONTROL, *to++ = *p++ + 0x20;
1582     }
1583   return (to - str);
1584 }
1585
1586 /* Arrange multibyte text at STR of LEN bytes as a unibyte text.  It
1587    actually converts only 8-bit characters in the range 0x80..0x9F to
1588    unibyte forms.  */
1589
1590 int
1591 str_as_unibyte (str, bytes)
1592      unsigned char *str;
1593      int bytes;
1594 {
1595   unsigned char *p = str, *endp = str + bytes;
1596   unsigned char *to = str;
1597
1598   while (p < endp && *p != LEADING_CODE_8_BIT_CONTROL) p++;
1599   to = p;
1600   while (p < endp)
1601     {
1602       if (*p == LEADING_CODE_8_BIT_CONTROL)
1603         *to++ = *(p + 1) - 0x20, p += 2;
1604       else
1605         *to++ = *p++;
1606     }
1607   return (to - str);
1608 }
1609
1610 \f
1611 DEFUN ("string", Fstring, Sstring, 1, MANY, 0,
1612   "Concatenate all the argument characters and make the result a string.")
1613   (n, args)
1614      int n;
1615      Lisp_Object *args;
1616 {
1617   int i;
1618   unsigned char *buf = (unsigned char *) alloca (MAX_MULTIBYTE_LENGTH * n);
1619   unsigned char *p = buf;
1620   int c;
1621   int multibyte = 0;
1622
1623   for (i = 0; i < n; i++)
1624     {
1625       CHECK_NUMBER (args[i], 0);
1626       if (!multibyte && !SINGLE_BYTE_CHAR_P (XFASTINT (args[i])))
1627         multibyte = 1;
1628     }
1629
1630   for (i = 0; i < n; i++)
1631     {
1632       c = XINT (args[i]);
1633       if (multibyte)
1634         p += CHAR_STRING (c, p);
1635       else
1636         *p++ = c;
1637     }
1638
1639   return make_string_from_bytes (buf, n, p - buf);
1640 }
1641
1642 #endif /* emacs */
1643 \f
1644 int
1645 charset_id_internal (charset_name)
1646      char *charset_name;
1647 {
1648   Lisp_Object val;
1649
1650   val= Fget (intern (charset_name), Qcharset);
1651   if (!VECTORP (val))
1652     error ("Charset %s is not defined", charset_name);
1653
1654   return (XINT (XVECTOR (val)->contents[0]));
1655 }
1656
1657 DEFUN ("setup-special-charsets", Fsetup_special_charsets,
1658        Ssetup_special_charsets, 0, 0, 0, "Internal use only.")
1659    ()
1660 {
1661   charset_latin_iso8859_1 = charset_id_internal ("latin-iso8859-1");
1662   charset_jisx0208_1978 = charset_id_internal ("japanese-jisx0208-1978");
1663   charset_jisx0208 = charset_id_internal ("japanese-jisx0208");
1664   charset_katakana_jisx0201 = charset_id_internal ("katakana-jisx0201");
1665   charset_latin_jisx0201 = charset_id_internal ("latin-jisx0201");
1666   charset_big5_1 = charset_id_internal ("chinese-big5-1");
1667   charset_big5_2 = charset_id_internal ("chinese-big5-2");
1668   return Qnil;
1669 }
1670
1671 void
1672 init_charset_once ()
1673 {
1674   int i, j, k;
1675
1676   staticpro (&Vcharset_table);
1677   staticpro (&Vcharset_symbol_table);
1678   staticpro (&Vgeneric_character_list);
1679
1680   /* This has to be done here, before we call Fmake_char_table.  */
1681   Qcharset_table = intern ("charset-table");
1682   staticpro (&Qcharset_table);
1683
1684   /* Intern this now in case it isn't already done.
1685      Setting this variable twice is harmless.
1686      But don't staticpro it here--that is done in alloc.c.  */
1687   Qchar_table_extra_slots = intern ("char-table-extra-slots");
1688
1689   /* Now we are ready to set up this property, so we can
1690      create the charset table.  */
1691   Fput (Qcharset_table, Qchar_table_extra_slots, make_number (0));
1692   Vcharset_table = Fmake_char_table (Qcharset_table, Qnil);
1693
1694   Qunknown = intern ("unknown");
1695   staticpro (&Qunknown);
1696   Vcharset_symbol_table = Fmake_vector (make_number (MAX_CHARSET + 1),
1697                                         Qunknown);
1698
1699   /* Setup tables.  */
1700   for (i = 0; i < 2; i++)
1701     for (j = 0; j < 2; j++)
1702       for (k = 0; k < 128; k++)
1703         iso_charset_table [i][j][k] = -1;
1704
1705   for (i = 0; i < 256; i++)
1706     bytes_by_char_head[i] = 1;
1707   bytes_by_char_head[LEADING_CODE_PRIVATE_11] = 3;
1708   bytes_by_char_head[LEADING_CODE_PRIVATE_12] = 3;
1709   bytes_by_char_head[LEADING_CODE_PRIVATE_21] = 4;
1710   bytes_by_char_head[LEADING_CODE_PRIVATE_22] = 4;
1711
1712   for (i = 0; i < 128; i++)
1713     width_by_char_head[i] = 1;
1714   for (; i < 256; i++)
1715     width_by_char_head[i] = 4;
1716   width_by_char_head[LEADING_CODE_PRIVATE_11] = 1;
1717   width_by_char_head[LEADING_CODE_PRIVATE_12] = 2;
1718   width_by_char_head[LEADING_CODE_PRIVATE_21] = 1;
1719   width_by_char_head[LEADING_CODE_PRIVATE_22] = 2;
1720
1721   {
1722     Lisp_Object val;
1723
1724     val = Qnil;
1725     for (i = 0x81; i < 0x90; i++)
1726       val = Fcons (make_number ((i - 0x70) << 7), val);
1727     for (; i < 0x9A; i++)
1728       val = Fcons (make_number ((i - 0x8F) << 14), val);
1729     for (i = 0xA0; i < 0xF0; i++)
1730       val = Fcons (make_number ((i - 0x70) << 7), val);
1731     for (; i < 0xFF; i++)
1732       val = Fcons (make_number ((i - 0xE0) << 14), val);
1733     Vgeneric_character_list = Fnreverse (val);
1734   }
1735
1736   nonascii_insert_offset = 0;
1737   Vnonascii_translation_table = Qnil;
1738 }
1739
1740 #ifdef emacs
1741
1742 void
1743 syms_of_charset ()
1744 {
1745   Qcharset = intern ("charset");
1746   staticpro (&Qcharset);
1747
1748   Qascii = intern ("ascii");
1749   staticpro (&Qascii);
1750
1751   Qeight_bit_control = intern ("eight-bit-control");
1752   staticpro (&Qeight_bit_control);
1753
1754   Qeight_bit_graphic = intern ("eight-bit-graphic");
1755   staticpro (&Qeight_bit_graphic);
1756
1757   /* Define special charsets ascii, eight-bit-control, and
1758      eight-bit-graphic.  */
1759   update_charset_table (make_number (CHARSET_ASCII),
1760                         make_number (1), make_number (94),
1761                         make_number (1),
1762                         make_number (0),
1763                         make_number ('B'),
1764                         make_number (0),
1765                         build_string ("ASCII"),
1766                         Qnil,   /* same as above */
1767                         build_string ("ASCII (ISO646 IRV)"));
1768   CHARSET_SYMBOL (CHARSET_ASCII) = Qascii;
1769   Fput (Qascii, Qcharset, CHARSET_TABLE_ENTRY (CHARSET_ASCII));
1770
1771   update_charset_table (make_number (CHARSET_8_BIT_CONTROL),
1772                         make_number (1), make_number (96),
1773                         make_number (4),
1774                         make_number (0),
1775                         make_number (-1),
1776                         make_number (-1),
1777                         build_string ("8-bit control code (0x80..0x9F)"),
1778                         Qnil,   /* same as above */
1779                         Qnil);  /* same as above */
1780   CHARSET_SYMBOL (CHARSET_8_BIT_CONTROL) = Qeight_bit_control;
1781   Fput (Qeight_bit_control, Qcharset,
1782         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_CONTROL));
1783
1784   update_charset_table (make_number (CHARSET_8_BIT_GRAPHIC),
1785                         make_number (1), make_number (96),
1786                         make_number (4),
1787                         make_number (0),
1788                         make_number (-1),
1789                         make_number (-1),
1790                         build_string ("8-bit graphic char (0xA0..0xFF)"),
1791                         Qnil,   /* same as above */
1792                         Qnil);  /* same as above */
1793   CHARSET_SYMBOL (CHARSET_8_BIT_GRAPHIC) = Qeight_bit_graphic;
1794   Fput (Qeight_bit_graphic, Qcharset,
1795         CHARSET_TABLE_ENTRY (CHARSET_8_BIT_GRAPHIC));
1796
1797   Qauto_fill_chars = intern ("auto-fill-chars");
1798   staticpro (&Qauto_fill_chars);
1799   Fput (Qauto_fill_chars, Qchar_table_extra_slots, make_number (0));
1800
1801   defsubr (&Sdefine_charset);
1802   defsubr (&Sgeneric_character_list);
1803   defsubr (&Sget_unused_iso_final_char);
1804   defsubr (&Sdeclare_equiv_charset);
1805   defsubr (&Sfind_charset_region);
1806   defsubr (&Sfind_charset_string);
1807   defsubr (&Smake_char_internal);
1808   defsubr (&Ssplit_char);
1809   defsubr (&Schar_charset);
1810   defsubr (&Scharset_after);
1811   defsubr (&Siso_charset);
1812   defsubr (&Schar_valid_p);
1813   defsubr (&Sunibyte_char_to_multibyte);
1814   defsubr (&Smultibyte_char_to_unibyte);
1815   defsubr (&Schar_bytes);
1816   defsubr (&Schar_width);
1817   defsubr (&Sstring_width);
1818   defsubr (&Schar_direction);
1819   defsubr (&Schars_in_region);
1820   defsubr (&Sstring);
1821   defsubr (&Ssetup_special_charsets);
1822
1823   DEFVAR_LISP ("charset-list", &Vcharset_list,
1824     "List of charsets ever defined.");
1825   Vcharset_list = Fcons (Qascii, Fcons (Qeight_bit_control,
1826                                         Fcons (Qeight_bit_graphic, Qnil)));
1827
1828   DEFVAR_LISP ("translation-table-vector",  &Vtranslation_table_vector,
1829     "Vector of cons cell of a symbol and translation table ever defined.\n\
1830 An ID of a translation table is an index of this vector.");
1831   Vtranslation_table_vector = Fmake_vector (make_number (16), Qnil);
1832
1833   DEFVAR_INT ("leading-code-private-11", &leading_code_private_11,
1834     "Leading-code of private TYPE9N charset of column-width 1.");
1835   leading_code_private_11 = LEADING_CODE_PRIVATE_11;
1836
1837   DEFVAR_INT ("leading-code-private-12", &leading_code_private_12,
1838     "Leading-code of private TYPE9N charset of column-width 2.");
1839   leading_code_private_12 = LEADING_CODE_PRIVATE_12;
1840
1841   DEFVAR_INT ("leading-code-private-21", &leading_code_private_21,
1842     "Leading-code of private TYPE9Nx9N charset of column-width 1.");
1843   leading_code_private_21 = LEADING_CODE_PRIVATE_21;
1844
1845   DEFVAR_INT ("leading-code-private-22", &leading_code_private_22,
1846     "Leading-code of private TYPE9Nx9N charset of column-width 2.");
1847   leading_code_private_22 = LEADING_CODE_PRIVATE_22;
1848
1849   DEFVAR_INT ("nonascii-insert-offset", &nonascii_insert_offset,
1850     "Offset for converting non-ASCII unibyte codes 0240...0377 to multibyte.\n\
1851 This is used for converting unibyte text to multibyte,\n\
1852 and for inserting character codes specified by number.\n\n\
1853 This serves to convert a Latin-1 or similar 8-bit character code\n\
1854 to the corresponding Emacs multibyte character code.\n\
1855 Typically the value should be (- (make-char CHARSET 0) 128),\n\
1856 for your choice of character set.\n\
1857 If `nonascii-translation-table' is non-nil, it overrides this variable.");
1858   nonascii_insert_offset = 0;
1859
1860   DEFVAR_LISP ("nonascii-translation-table", &Vnonascii_translation_table,
1861     "Translation table to convert non-ASCII unibyte codes to multibyte.\n\
1862 This is used for converting unibyte text to multibyte,\n\
1863 and for inserting character codes specified by number.\n\n\
1864 Conversion is performed only when multibyte characters are enabled,\n\
1865 and it serves to convert a Latin-1 or similar 8-bit character code\n\
1866 to the corresponding Emacs character code.\n\n\
1867 If this is nil, `nonascii-insert-offset' is used instead.\n\
1868 See also the docstring of `make-translation-table'.");
1869   Vnonascii_translation_table = Qnil;
1870
1871   DEFVAR_LISP ("auto-fill-chars", &Vauto_fill_chars,
1872     "A char-table for characters which invoke auto-filling.\n\
1873 Such characters have value t in this table.");
1874   Vauto_fill_chars = Fmake_char_table (Qauto_fill_chars, Qnil);
1875   CHAR_TABLE_SET (Vauto_fill_chars, make_number (' '), Qt);
1876   CHAR_TABLE_SET (Vauto_fill_chars, make_number ('\n'), Qt);
1877 }
1878
1879 #endif /* emacs */