code.delx.au - gnu-emacs/blob - src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. End-of-line handlers
  29   6. C library functions
  30   7. Emacs Lisp library functions
  31   8. Post-amble
  32
  33 */
  34
  35 /*** GENERAL NOTE on CODING SYSTEM ***
  36
  37   Coding system is an encoding mechanism of one or more character
  38   sets.  Here's a list of coding systems which Emacs can handle.  When
  39   we say "decode", it means converting some other coding system to
  40   Emacs' internal format (emacs-internal), and when we say "encode",
  41   it means converting the coding system emacs-mule to some other
  42   coding system.
  43
  44   0. Emacs' internal format (emacs-mule)
  45
  46   Emacs itself holds a multi-lingual character in a buffer and a string
  47   in a special format.  Details are described in section 2.
  48
  49   1. ISO2022
  50
  51   The most famous coding system for multiple character sets.  X's
  52   Compound Text, various EUCs (Extended Unix Code), and coding
  53   systems used in Internet communication such as ISO-2022-JP are
  54   all variants of ISO2022.  Details are described in section 3.
  55
  56   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  57
  58   A coding system to encode character sets: ASCII, JISX0201, and
  59   JISX0208.  Widely used for PC's in Japan.  Details are described in
  60   section 4.
  61
  62   3. BIG5
  63
  64   A coding system to encode character sets: ASCII and Big5.  Widely
  65   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  66   described in section 4.  In this file, when we write "BIG5"
  67   (all uppercase), we mean the coding system, and when we write
  68   "Big5" (capitalized), we mean the character set.
  69
  70   4. Raw text
  71
  72   A coding system for a text containing random 8-bit code.  Emacs does
  73   no code conversion on such a text except for end-of-line format.
  74
  75   5. Other
  76
  77   If a user wants to read/write a text encoded in a coding system not
  78   listed above, he can supply a decoder and an encoder for it in CCL
  79   (Code Conversion Language) programs.  Emacs executes the CCL program
  80   while reading/writing.
  81
  82   Emacs represents a coding system by a Lisp symbol that has a property
  83   `coding-system'.  But, before actually using the coding system, the
  84   information about it is set in a structure of type `struct
  85   coding_system' for rapid processing.  See section 6 for more details.
  86
  87 */
  88
  89 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  90
  91   How end-of-line of a text is encoded depends on a system.  For
  92   instance, Unix's format is just one byte of `line-feed' code,
  93   whereas DOS's format is two-byte sequence of `carriage-return' and
  94   `line-feed' codes.  MacOS's format is usually one byte of
  95   `carriage-return'.
  96
  97   Since text characters encoding and end-of-line encoding are
  98   independent, any coding system described above can take
  99   any format of end-of-line.  So, Emacs has information of format of
 100   end-of-line in each coding-system.  See section 6 for more details.
 101
 102 */
 103
 104 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 105
 106   These functions check if a text between SRC and SRC_END is encoded
 107   in the coding system category XXX.  Each returns an integer value in
 108   which appropriate flag bits for the category XXX is set.  The flag
 109   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 110   template of these functions.  */
 111 #if 0
 112 int
 113 detect_coding_emacs_mule (src, src_end)
 114      unsigned char *src, *src_end;
 115 {
 116   ...
 117 }
 118 #endif
 119
 120 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 121
 122   These functions decode SRC_BYTES length text at SOURCE encoded in
 123   CODING to Emacs' internal format (emacs-mule).  The resulting text
 124   goes to a place pointed to by DESTINATION, the length of which
 125   should not exceed DST_BYTES.  These functions set the information of
 126   original and decoded texts in the members produced, produced_char,
 127   consumed, and consumed_char of the structure *CODING.
 128
 129   The return value is an integer (CODING_FINISH_XXX) indicating how
 130   the decoding finished.
 131
 132   DST_BYTES zero means that source area and destination area are
 133   overlapped, which means that we can produce a decoded text until it
 134   reaches at the head of not-yet-decoded source text.
 135
 136   Below is a template of these functions.  */
 137 #if 0
 138 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 139      struct coding_system *coding;
 140      unsigned char *source, *destination;
 141      int src_bytes, dst_bytes;
 142 {
 143   ...
 144 }
 145 #endif
 146
 147 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 148
 149   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 150   internal format (emacs-mule) to CODING.  The resulting text goes to
 151   a place pointed to by DESTINATION, the length of which should not
 152   exceed DST_BYTES.  These functions set the information of
 153   original and encoded texts in the members produced, produced_char,
 154   consumed, and consumed_char of the structure *CODING.
 155
 156   The return value is an integer (CODING_FINISH_XXX) indicating how
 157   the encoding finished.
 158
 159   DST_BYTES zero means that source area and destination area are
 160   overlapped, which means that we can produce a decoded text until it
 161   reaches at the head of not-yet-decoded source text.
 162
 163   Below is a template of these functions.  */
 164 #if 0
 165 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 166      struct coding_system *coding;
 167      unsigned char *source, *destination;
 168      int src_bytes, dst_bytes;
 169 {
 170   ...
 171 }
 172 #endif
 173
 174 /*** COMMONLY USED MACROS ***/
 175
 176 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 177    THREE_MORE_BYTES safely get one, two, and three bytes from the
 178    source text respectively.  If there are not enough bytes in the
 179    source, they jump to `label_end_of_loop'.  The caller should set
 180    variables `src' and `src_end' to appropriate areas in advance.  */
 181
 182 #define ONE_MORE_BYTE(c1)       \
 183   do {                          \
 184     if (src < src_end)          \
 185       c1 = *src++;              \
 186     else                        \
 187       goto label_end_of_loop;   \
 188   } while (0)
 189
 190 #define TWO_MORE_BYTES(c1, c2)  \
 191   do {                          \
 192     if (src + 1 < src_end)      \
 193       c1 = *src++, c2 = *src++; \
 194     else                        \
 195       goto label_end_of_loop;   \
 196   } while (0)
 197
 198 #define THREE_MORE_BYTES(c1, c2, c3)            \
 199   do {                                          \
 200     if (src + 2 < src_end)                      \
 201       c1 = *src++, c2 = *src++, c3 = *src++;    \
 202     else                                        \
 203       goto label_end_of_loop;                   \
 204   } while (0)
 205
 206 /* The following three macros DECODE_CHARACTER_ASCII,
 207    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 208    the multi-byte form of a character of each class at the place
 209    pointed by `dst'.  The caller should set the variable `dst' to
 210    point to an appropriate area and the variable `coding' to point to
 211    the coding-system of the currently decoding text in advance.  */
 212
 213 /* Decode one ASCII character C.  */
 214
 215 #define DECODE_CHARACTER_ASCII(c)                               \
 216   do {                                                          \
 217     if (COMPOSING_P (coding->composing))                        \
 218       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 219     else                                                        \
 220       {                                                         \
 221         *dst++ = (c);                                           \
 222         coding->produced_char++;                                \
 223       }                                                         \
 224   } while (0)
 225
 226 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 227    position-code is C.  */
 228
 229 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 230   do {                                                                  \
 231     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 232     if (COMPOSING_P (coding->composing))                                \
 233       *dst++ = leading_code + 0x20;                                     \
 234     else                                                                \
 235       {                                                                 \
 236         *dst++ = leading_code;                                          \
 237         coding->produced_char++;                                        \
 238       }                                                                 \
 239     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 240       *dst++ = leading_code;                                            \
 241     *dst++ = (c) | 0x80;                                                \
 242   } while (0)
 243
 244 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 245    position-codes are C1 and C2.  */
 246
 247 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 248   do {                                                  \
 249     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 250     *dst++ = (c2) | 0x80;                               \
 251   } while (0)
 252
 253 \f
 254 /*** 1. Preamble ***/
 255
 256 #include <stdio.h>
 257
 258 #ifdef emacs
 259
 260 #include <config.h>
 261 #include "lisp.h"
 262 #include "buffer.h"
 263 #include "charset.h"
 264 #include "ccl.h"
 265 #include "coding.h"
 266 #include "window.h"
 267
 268 #else  /* not emacs */
 269
 270 #include "mulelib.h"
 271
 272 #endif /* not emacs */
 273
 274 Lisp_Object Qcoding_system, Qeol_type;
 275 Lisp_Object Qbuffer_file_coding_system;
 276 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 277 Lisp_Object Qno_conversion, Qundecided;
 278 Lisp_Object Qcoding_system_history;
 279 Lisp_Object Qsafe_charsets;
 280
 281 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 282 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 283 Lisp_Object Qstart_process, Qopen_network_stream;
 284 Lisp_Object Qtarget_idx;
 285
 286 Lisp_Object Vselect_safe_coding_system_function;
 287
 288 /* Mnemonic character of each format of end-of-line.  */
 289 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 290 /* Mnemonic character to indicate format of end-of-line is not yet
 291    decided.  */
 292 int eol_mnemonic_undecided;
 293
 294 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 295    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 296 int system_eol_type;
 297
 298 #ifdef emacs
 299
 300 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 301
 302 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 303
 304 /* Coding system emacs-mule and raw-text are for converting only
 305    end-of-line format.  */
 306 Lisp_Object Qemacs_mule, Qraw_text;
 307
 308 /* Coding-systems are handed between Emacs Lisp programs and C internal
 309    routines by the following three variables.  */
 310 /* Coding-system for reading files and receiving data from process.  */
 311 Lisp_Object Vcoding_system_for_read;
 312 /* Coding-system for writing files and sending data to process.  */
 313 Lisp_Object Vcoding_system_for_write;
 314 /* Coding-system actually used in the latest I/O.  */
 315 Lisp_Object Vlast_coding_system_used;
 316
 317 /* A vector of length 256 which contains information about special
 318    Latin codes (espepcially for dealing with Microsoft code).  */
 319 Lisp_Object Vlatin_extra_code_table;
 320
 321 /* Flag to inhibit code conversion of end-of-line format.  */
 322 int inhibit_eol_conversion;
 323
 324 /* Coding system to be used to encode text for terminal display.  */
 325 struct coding_system terminal_coding;
 326
 327 /* Coding system to be used to encode text for terminal display when
 328    terminal coding system is nil.  */
 329 struct coding_system safe_terminal_coding;
 330
 331 /* Coding system of what is sent from terminal keyboard.  */
 332 struct coding_system keyboard_coding;
 333
 334 Lisp_Object Vfile_coding_system_alist;
 335 Lisp_Object Vprocess_coding_system_alist;
 336 Lisp_Object Vnetwork_coding_system_alist;
 337
 338 #endif /* emacs */
 339
 340 Lisp_Object Qcoding_category, Qcoding_category_index;
 341
 342 /* List of symbols `coding-category-xxx' ordered by priority.  */
 343 Lisp_Object Vcoding_category_list;
 344
 345 /* Table of coding categories (Lisp symbols).  */
 346 Lisp_Object Vcoding_category_table;
 347
 348 /* Table of names of symbol for each coding-category.  */
 349 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 350   "coding-category-emacs-mule",
 351   "coding-category-sjis",
 352   "coding-category-iso-7",
 353   "coding-category-iso-7-tight",
 354   "coding-category-iso-8-1",
 355   "coding-category-iso-8-2",
 356   "coding-category-iso-7-else",
 357   "coding-category-iso-8-else",
 358   "coding-category-big5",
 359   "coding-category-raw-text",
 360   "coding-category-binary"
 361 };
 362
 363 /* Table pointers to coding systems corresponding to each coding
 364    categories.  */
 365 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 366
 367 /* Flag to tell if we look up unification table on character code
 368    conversion.  */
 369 Lisp_Object Venable_character_unification;
 370 /* Standard unification table to look up on decoding (reading).  */
 371 Lisp_Object Vstandard_character_unification_table_for_decode;
 372 /* Standard unification table to look up on encoding (writing).  */
 373 Lisp_Object Vstandard_character_unification_table_for_encode;
 374
 375 Lisp_Object Qcharacter_unification_table;
 376 Lisp_Object Qcharacter_unification_table_for_decode;
 377 Lisp_Object Qcharacter_unification_table_for_encode;
 378
 379 /* Alist of charsets vs revision number.  */
 380 Lisp_Object Vcharset_revision_alist;
 381
 382 /* Default coding systems used for process I/O.  */
 383 Lisp_Object Vdefault_process_coding_system;
 384
 385 \f
 386 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 387
 388 /* Emacs' internal format for encoding multiple character sets is a
 389    kind of multi-byte encoding, i.e. characters are encoded by
 390    variable-length sequences of one-byte codes.  ASCII characters
 391    and control characters (e.g. `tab', `newline') are represented by
 392    one-byte sequences which are their ASCII codes, in the range 0x00
 393    through 0x7F.  The other characters are represented by a sequence
 394    of `base leading-code', optional `extended leading-code', and one
 395    or two `position-code's.  The length of the sequence is determined
 396    by the base leading-code.  Leading-code takes the range 0x80
 397    through 0x9F, whereas extended leading-code and position-code take
 398    the range 0xA0 through 0xFF.  See `charset.h' for more details
 399    about leading-code and position-code.
 400
 401    There's one exception to this rule.  Special leading-code
 402    `leading-code-composition' denotes that the following several
 403    characters should be composed into one character.  Leading-codes of
 404    components (except for ASCII) are added 0x20.  An ASCII character
 405    component is represented by a 2-byte sequence of `0xA0' and
 406    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 407    details of composite character.  Hence, we can summarize the code
 408    range as follows:
 409
 410    --- CODE RANGE of Emacs' internal format ---
 411    (character set)      (range)
 412    ASCII                0x00 .. 0x7F
 413    ELSE (1st byte)      0x80 .. 0x9F
 414         (rest bytes)    0xA0 .. 0xFF
 415    ---------------------------------------------
 416
 417   */
 418
 419 enum emacs_code_class_type emacs_code_class[256];
 420
 421 /* Go to the next statement only if *SRC is accessible and the code is
 422    greater than 0xA0.  */
 423 #define CHECK_CODE_RANGE_A0_FF  \
 424   do {                          \
 425     if (src >= src_end)         \
 426       goto label_end_of_switch; \
 427     else if (*src++ < 0xA0)     \
 428       return 0;                 \
 429   } while (0)
 430
 431 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 432    Check if a text is encoded in Emacs' internal format.  If it is,
 433    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 434
 435 int
 436 detect_coding_emacs_mule (src, src_end)
 437      unsigned char *src, *src_end;
 438 {
 439   unsigned char c;
 440   int composing = 0;
 441
 442   while (src < src_end)
 443     {
 444       c = *src++;
 445
 446       if (composing)
 447         {
 448           if (c < 0xA0)
 449             composing = 0;
 450           else
 451             c -= 0x20;
 452         }
 453
 454       switch (emacs_code_class[c])
 455         {
 456         case EMACS_ascii_code:
 457         case EMACS_linefeed_code:
 458           break;
 459
 460         case EMACS_control_code:
 461           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 462             return 0;
 463           break;
 464
 465         case EMACS_invalid_code:
 466           return 0;
 467
 468         case EMACS_leading_code_composition: /* c == 0x80 */
 469           if (composing)
 470             CHECK_CODE_RANGE_A0_FF;
 471           else
 472             composing = 1;
 473           break;
 474
 475         case EMACS_leading_code_4:
 476           CHECK_CODE_RANGE_A0_FF;
 477           /* fall down to check it two more times ...  */
 478
 479         case EMACS_leading_code_3:
 480           CHECK_CODE_RANGE_A0_FF;
 481           /* fall down to check it one more time ...  */
 482
 483         case EMACS_leading_code_2:
 484           CHECK_CODE_RANGE_A0_FF;
 485           break;
 486
 487         default:
 488         label_end_of_switch:
 489           break;
 490         }
 491     }
 492   return CODING_CATEGORY_MASK_EMACS_MULE;
 493 }
 494
 495 \f
 496 /*** 3. ISO2022 handlers ***/
 497
 498 /* The following note describes the coding system ISO2022 briefly.
 499    Since the intention of this note is to help in understanding of
 500    the programs in this file, some parts are NOT ACCURATE or OVERLY
 501    SIMPLIFIED.  For the thorough understanding, please refer to the
 502    original document of ISO2022.
 503
 504    ISO2022 provides many mechanisms to encode several character sets
 505    in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
 506    all text is encoded by codes of less than 128.  This may make the
 507    encoded text a little bit longer, but the text gets more stability
 508    to pass through several gateways (some of them strip off the MSB).
 509
 510    There are two kinds of character set: control character set and
 511    graphic character set.  The former contains control characters such
 512    as `newline' and `escape' to provide control functions (control
 513    functions are provided also by escape sequences).  The latter
 514    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 515    two control character sets and many graphic character sets.
 516
 517    Graphic character sets are classified into one of the following
 518    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 519    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 520    bytes (DIMENSION) and the number of characters in one dimension
 521    (CHARS) of the set.  In addition, each character set is assigned an
 522    identification tag (called "final character" and denoted as <F>
 523    here after) which is unique in each class.  <F> of each character
 524    set is decided by ECMA(*) when it is registered in ISO.  Code range
 525    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 526
 527    Note (*): ECMA = European Computer Manufacturers Association
 528
 529    Here are examples of graphic character set [NAME(<F>)]:
 530         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 531         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 532         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 533         o DIMENSION2_CHARS96 -- none for the moment
 534
 535    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 536         C0 [0x00..0x1F] -- control character plane 0
 537         GL [0x20..0x7F] -- graphic character plane 0
 538         C1 [0x80..0x9F] -- control character plane 1
 539         GR [0xA0..0xFF] -- graphic character plane 1
 540
 541    A control character set is directly designated and invoked to C0 or
 542    C1 by an escape sequence.  The most common case is that ISO646's
 543    control character set is designated/invoked to C0 and ISO6429's
 544    control character set is designated/invoked to C1, and usually
 545    these designations/invocations are omitted in a coded text.  With
 546    7-bit environment, only C0 can be used, and a control character for
 547    C1 is encoded by an appropriate escape sequence to fit in the
 548    environment.  All control characters for C1 are defined the
 549    corresponding escape sequences.
 550
 551    A graphic character set is at first designated to one of four
 552    graphic registers (G0 through G3), then these graphic registers are
 553    invoked to GL or GR.  These designations and invocations can be
 554    done independently.  The most common case is that G0 is invoked to
 555    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 556    these invocations and designations are omitted in a coded text.
 557    With 7-bit environment, only GL can be used.
 558
 559    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 560    and 0x7F of GL area work as control characters SPACE and DEL
 561    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 562
 563    There are two ways of invocation: locking-shift and single-shift.
 564    With locking-shift, the invocation lasts until the next different
 565    invocation, whereas with single-shift, the invocation works only
 566    for the following character and doesn't affect locking-shift.
 567    Invocations are done by the following control characters or escape
 568    sequences.
 569
 570    ----------------------------------------------------------------------
 571    function             control char    escape sequence description
 572    ----------------------------------------------------------------------
 573    SI  (shift-in)               0x0F    none            invoke G0 to GL
 574    SO  (shift-out)              0x0E    none            invoke G1 to GL
 575    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 576    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 577    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 578    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 579    ----------------------------------------------------------------------
 580    The first four are for locking-shift.  Control characters for these
 581    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 582
 583    Designations are done by the following escape sequences.
 584    ----------------------------------------------------------------------
 585    escape sequence      description
 586    ----------------------------------------------------------------------
 587    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 588    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 589    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 590    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 591    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 592    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 593    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 594    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 595    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 596    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 597    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 598    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 599    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 600    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 601    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 602    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 603    ----------------------------------------------------------------------
 604
 605    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 606    of dimension 1, chars 94, and final character <F>, and etc.
 607
 608    Note (*): Although these designations are not allowed in ISO2022,
 609    Emacs accepts them on decoding, and produces them on encoding
 610    CHARS96 character set in a coding system which is characterized as
 611    7-bit environment, non-locking-shift, and non-single-shift.
 612
 613    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 614    '(' can be omitted.  We call this as "short-form" here after.
 615
 616    Now you may notice that there are a lot of ways for encoding the
 617    same multilingual text in ISO2022.  Actually, there exists many
 618    coding systems such as Compound Text (used in X's inter client
 619    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 620    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 621    localized platforms), and all of these are variants of ISO2022.
 622
 623    In addition to the above, Emacs handles two more kinds of escape
 624    sequences: ISO6429's direction specification and Emacs' private
 625    sequence for specifying character composition.
 626
 627    ISO6429's direction specification takes the following format:
 628         o CSI ']'      -- end of the current direction
 629         o CSI '0' ']'  -- end of the current direction
 630         o CSI '1' ']'  -- start of left-to-right text
 631         o CSI '2' ']'  -- start of right-to-left text
 632    The control character CSI (0x9B: control sequence introducer) is
 633    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 634
 635    Character composition specification takes the following format:
 636         o ESC '0' -- start character composition
 637         o ESC '1' -- end character composition
 638    Since these are not standard escape sequences of any ISO, the use
 639    of them for these meaning is restricted to Emacs only.  */
 640
 641 enum iso_code_class_type iso_code_class[256];
 642
 643 #define CHARSET_OK(idx, charset)                \
 644   (CODING_SPEC_ISO_REQUESTED_DESIGNATION        \
 645    (coding_system_table[idx], charset)          \
 646    != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
 647
 648 #define SHIFT_OUT_OK(idx) \
 649   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 650
 651 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 652    Check if a text is encoded in ISO2022.  If it is, returns an
 653    integer in which appropriate flag bits any of:
 654         CODING_CATEGORY_MASK_ISO_7
 655         CODING_CATEGORY_MASK_ISO_7_TIGHT
 656         CODING_CATEGORY_MASK_ISO_8_1
 657         CODING_CATEGORY_MASK_ISO_8_2
 658         CODING_CATEGORY_MASK_ISO_7_ELSE
 659         CODING_CATEGORY_MASK_ISO_8_ELSE
 660    are set.  If a code which should never appear in ISO2022 is found,
 661    returns 0.  */
 662
 663 int
 664 detect_coding_iso2022 (src, src_end)
 665      unsigned char *src, *src_end;
 666 {
 667   int mask = CODING_CATEGORY_MASK_ISO;
 668   int mask_found = 0;
 669   int reg[4], shift_out = 0;
 670   int c, c1, i, charset;
 671
 672   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 673   while (mask && src < src_end)
 674     {
 675       c = *src++;
 676       switch (c)
 677         {
 678         case ISO_CODE_ESC:
 679           if (src >= src_end)
 680             break;
 681           c = *src++;
 682           if (c >= '(' && c <= '/')
 683             {
 684               /* Designation sequence for a charset of dimension 1.  */
 685               if (src >= src_end)
 686                 break;
 687               c1 = *src++;
 688               if (c1 < ' ' || c1 >= 0x80
 689                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 690                 /* Invalid designation sequence.  Just ignore.  */
 691                 break;
 692               reg[(c - '(') % 4] = charset;
 693             }
 694           else if (c == '$')
 695             {
 696               /* Designation sequence for a charset of dimension 2.  */
 697               if (src >= src_end)
 698                 break;
 699               c = *src++;
 700               if (c >= '@' && c <= 'B')
 701                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 702                 reg[0] = charset = iso_charset_table[1][0][c];
 703               else if (c >= '(' && c <= '/')
 704                 {
 705                   if (src >= src_end)
 706                     break;
 707                   c1 = *src++;
 708                   if (c1 < ' ' || c1 >= 0x80
 709                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 710                     /* Invalid designation sequence.  Just ignore.  */
 711                     break;
 712                   reg[(c - '(') % 4] = charset;
 713                 }
 714               else
 715                 /* Invalid designation sequence.  Just ignore.  */
 716                 break;
 717             }
 718           else if (c == 'N' || c == 'n')
 719             {
 720               if (shift_out == 0
 721                   && (reg[1] >= 0
 722                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 723                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 724                 {
 725                   /* Locking shift out.  */
 726                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 727                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 728                   shift_out = 1;
 729                 }
 730               break;
 731             }
 732           else if (c == 'O' || c == 'o')
 733             {
 734               if (shift_out == 1)
 735                 {
 736                   /* Locking shift in.  */
 737                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 738                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 739                   shift_out = 0;
 740                 }
 741               break;
 742             }
 743           else if (c == '0' || c == '1' || c == '2')
 744             /* Start/end composition.  Just ignore.  */
 745             break;
 746           else
 747             /* Invalid escape sequence.  Just ignore.  */
 748             break;
 749
 750           /* We found a valid designation sequence for CHARSET.  */
 751           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 752           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 753             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 754           else
 755             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 756           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 757             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 758           else
 759             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 760           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 761             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 762           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 763             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 764           break;
 765
 766         case ISO_CODE_SO:
 767           if (shift_out == 0
 768               && (reg[1] >= 0
 769                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 770                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 771             {
 772               /* Locking shift out.  */
 773               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 774               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 775             }
 776           break;
 777
 778         case ISO_CODE_SI:
 779           if (shift_out == 1)
 780             {
 781               /* Locking shift in.  */
 782               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 783               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 784             }
 785           break;
 786
 787         case ISO_CODE_CSI:
 788         case ISO_CODE_SS2:
 789         case ISO_CODE_SS3:
 790           {
 791             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 792
 793             if (c != ISO_CODE_CSI)
 794               {
 795                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 796                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 797                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 798                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 799                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 800                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 801               }
 802             if (VECTORP (Vlatin_extra_code_table)
 803                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 804               {
 805                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 806                     & CODING_FLAG_ISO_LATIN_EXTRA)
 807                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 808                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 809                     & CODING_FLAG_ISO_LATIN_EXTRA)
 810                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 811               }
 812             mask &= newmask;
 813             mask_found |= newmask;
 814           }
 815           break;
 816
 817         default:
 818           if (c < 0x80)
 819             break;
 820           else if (c < 0xA0)
 821             {
 822               if (VECTORP (Vlatin_extra_code_table)
 823                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 824                 {
 825                   int newmask = 0;
 826
 827                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 828                       & CODING_FLAG_ISO_LATIN_EXTRA)
 829                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 830                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 831                       & CODING_FLAG_ISO_LATIN_EXTRA)
 832                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 833                   mask &= newmask;
 834                   mask_found |= newmask;
 835                 }
 836               else
 837                 return 0;
 838             }
 839           else
 840             {
 841               unsigned char *src_begin = src;
 842
 843               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 844                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 845               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 846               while (src < src_end && *src >= 0xA0)
 847                 src++;
 848               if ((src - src_begin - 1) & 1 && src < src_end)
 849                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 850               else
 851                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 852             }
 853           break;
 854         }
 855     }
 856
 857   return (mask & mask_found);
 858 }
 859
 860 /* Decode a character of which charset is CHARSET and the 1st position
 861    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 862    fetched from SRC and set to C2.  If CHARSET is negative, it means
 863    that we are decoding ill formed text, and what we can do is just to
 864    read C1 as is.  */
 865
 866 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 867   do {                                                                  \
 868     int c_alt, charset_alt = (charset);                                 \
 869     if (COMPOSING_HEAD_P (coding->composing))                           \
 870       {                                                                 \
 871         *dst++ = LEADING_CODE_COMPOSITION;                              \
 872         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 873           /* To tell composition rules are embeded.  */                 \
 874           *dst++ = 0xFF;                                                \
 875         coding->composing += 2;                                         \
 876       }                                                                 \
 877     if ((charset) >= 0)                                                 \
 878       {                                                                 \
 879         if (CHARSET_DIMENSION (charset) == 2)                           \
 880           {                                                             \
 881             ONE_MORE_BYTE (c2);                                         \
 882             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 883                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 884               {                                                         \
 885                 src--;                                                  \
 886                 c2 = ' ';                                               \
 887               }                                                         \
 888           }                                                             \
 889         if (!NILP (unification_table)                                   \
 890             && ((c_alt = unify_char (unification_table,                 \
 891                                      -1, (charset), c1, c2)) >= 0))     \
 892           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 893       }                                                                 \
 894     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 895       DECODE_CHARACTER_ASCII (c1);                                      \
 896     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 897       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 898     else                                                                \
 899       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 900     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 901       /* To tell a composition rule follows.  */                        \
 902       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 903   } while (0)
 904
 905 /* Set designation state into CODING.  */
 906 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 907   do {                                                                     \
 908     int charset = ISO_CHARSET_TABLE (make_number (dimension),              \
 909                                      make_number (chars),                  \
 910                                      make_number (final_char));            \
 911     if (charset >= 0                                                       \
 912         && CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg) \
 913       {                                                                    \
 914         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 915             && reg == 0                                                    \
 916             && charset == CHARSET_ASCII)                                   \
 917           {                                                                \
 918             /* We should insert this designation sequence as is so         \
 919                that it is surely written back to a file.  */               \
 920             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 921             goto label_invalid_code;                                       \
 922           }                                                                \
 923         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 924         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 925             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 926           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 927         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 928       }                                                                    \
 929     else                                                                   \
 930       {                                                                    \
 931         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 932         goto label_invalid_code;                                           \
 933       }                                                                    \
 934   } while (0)
 935
 936 /* Check if the current composing sequence contains only valid codes.
 937    If the composing sequence doesn't end before SRC_END, return -1.
 938    Else, if it contains only valid codes, return 0.
 939    Else return the length of the composing sequence.  */
 940
 941 int check_composing_code (coding, src, src_end)
 942      struct coding_system *coding;
 943      unsigned char *src, *src_end;
 944 {
 945   unsigned char *src_start = src;
 946   int invalid_code_found = 0;
 947   int charset, c, c1, dim;
 948
 949   while (src < src_end)
 950     {
 951       if (*src++ != ISO_CODE_ESC) continue;
 952       if (src >= src_end) break;
 953       if ((c = *src++) == '1') /* end of compsition */
 954         return (invalid_code_found ? src - src_start : 0);
 955       if (src + 2 >= src_end) break;
 956       if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
 957         invalid_code_found = 1;
 958       else
 959         {
 960           dim = 0;
 961           if (c == '$')
 962             {
 963               dim = 1;
 964               c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
 965             }
 966           if (c >= '(' && c <= '/')
 967             {
 968               c1 = *src++;
 969               if ((c1 < ' ' || c1 >= 0x80)
 970                   || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
 971                   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
 972                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 973                 invalid_code_found = 1;
 974             }
 975           else
 976             invalid_code_found = 1;
 977         }
 978     }
 979   return ((coding->mode & CODING_MODE_LAST_BLOCK) ? src_end - src_start : -1);
 980 }
 981
 982 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 983
 984 int
 985 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
 986      struct coding_system *coding;
 987      unsigned char *source, *destination;
 988      int src_bytes, dst_bytes;
 989 {
 990   unsigned char *src = source;
 991   unsigned char *src_end = source + src_bytes;
 992   unsigned char *dst = destination;
 993   unsigned char *dst_end = destination + dst_bytes;
 994   /* Since the maximum bytes produced by each loop is 7, we subtract 6
 995      from DST_END to assure that overflow checking is necessary only
 996      at the head of loop.  */
 997   unsigned char *adjusted_dst_end = dst_end - 6;
 998   int charset;
 999   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1000   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1001   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1002   Lisp_Object unification_table
1003     = coding->character_unification_table_for_decode;
1004   int result = CODING_FINISH_NORMAL;
1005
1006   if (!NILP (Venable_character_unification) && NILP (unification_table))
1007     unification_table = Vstandard_character_unification_table_for_decode;
1008
1009   coding->produced_char = 0;
1010   while (src < src_end && (dst_bytes
1011                            ? (dst < adjusted_dst_end)
1012                            : (dst < src - 6)))
1013     {
1014       /* SRC_BASE remembers the start position in source in each loop.
1015          The loop will be exited when there's not enough source text
1016          to analyze long escape sequence or 2-byte code (within macros
1017          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1018          to SRC_BASE before exiting.  */
1019       unsigned char *src_base = src;
1020       int c1 = *src++, c2;
1021
1022       switch (iso_code_class [c1])
1023         {
1024         case ISO_0x20_or_0x7F:
1025           if (!coding->composing
1026               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1027             {
1028               /* This is SPACE or DEL.  */
1029               *dst++ = c1;
1030               coding->produced_char++;
1031               break;
1032             }
1033           /* This is a graphic character, we fall down ...  */
1034
1035         case ISO_graphic_plane_0:
1036           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1037             {
1038               /* This is a composition rule.  */
1039               *dst++ = c1 | 0x80;
1040               coding->composing = COMPOSING_WITH_RULE_TAIL;
1041             }
1042           else
1043             DECODE_ISO_CHARACTER (charset0, c1);
1044           break;
1045
1046         case ISO_0xA0_or_0xFF:
1047           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1048               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1049             {
1050               /* Invalid code.  */
1051               *dst++ = c1;
1052               coding->produced_char++;
1053               break;
1054             }
1055           /* This is a graphic character, we fall down ... */
1056
1057         case ISO_graphic_plane_1:
1058           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1059             {
1060               /* Invalid code.  */
1061               *dst++ = c1;
1062               coding->produced_char++;
1063             }
1064           else
1065             DECODE_ISO_CHARACTER (charset1, c1);
1066           break;
1067
1068         case ISO_control_code:
1069           /* All ISO2022 control characters in this class have the
1070              same representation in Emacs internal format.  */
1071           if (c1 == '\n'
1072               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1073               && (coding->eol_type == CODING_EOL_CR
1074                   || coding->eol_type == CODING_EOL_CRLF))
1075             {
1076               result = CODING_FINISH_INCONSISTENT_EOL;
1077               goto label_end_of_loop_2;
1078             }
1079           *dst++ = c1;
1080           coding->produced_char++;
1081           break;
1082
1083         case ISO_carriage_return:
1084           if (coding->eol_type == CODING_EOL_CR)
1085             *dst++ = '\n';
1086           else if (coding->eol_type == CODING_EOL_CRLF)
1087             {
1088               ONE_MORE_BYTE (c1);
1089               if (c1 == ISO_CODE_LF)
1090                 *dst++ = '\n';
1091               else
1092                 {
1093                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1094                     {
1095                       result = CODING_FINISH_INCONSISTENT_EOL;
1096                       goto label_end_of_loop_2;
1097                     }
1098                   src--;
1099                   *dst++ = '\r';
1100                 }
1101             }
1102           else
1103             *dst++ = c1;
1104           coding->produced_char++;
1105           break;
1106
1107         case ISO_shift_out:
1108           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1109               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1110             goto label_invalid_code;
1111           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1112           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1113           break;
1114
1115         case ISO_shift_in:
1116           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1117             goto label_invalid_code;
1118           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1119           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1120           break;
1121
1122         case ISO_single_shift_2_7:
1123         case ISO_single_shift_2:
1124           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1125             goto label_invalid_code;
1126           /* SS2 is handled as an escape sequence of ESC 'N' */
1127           c1 = 'N';
1128           goto label_escape_sequence;
1129
1130         case ISO_single_shift_3:
1131           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1132             goto label_invalid_code;
1133           /* SS2 is handled as an escape sequence of ESC 'O' */
1134           c1 = 'O';
1135           goto label_escape_sequence;
1136
1137         case ISO_control_sequence_introducer:
1138           /* CSI is handled as an escape sequence of ESC '[' ...  */
1139           c1 = '[';
1140           goto label_escape_sequence;
1141
1142         case ISO_escape:
1143           ONE_MORE_BYTE (c1);
1144         label_escape_sequence:
1145           /* Escape sequences handled by Emacs are invocation,
1146              designation, direction specification, and character
1147              composition specification.  */
1148           switch (c1)
1149             {
1150             case '&':           /* revision of following character set */
1151               ONE_MORE_BYTE (c1);
1152               if (!(c1 >= '@' && c1 <= '~'))
1153                 goto label_invalid_code;
1154               ONE_MORE_BYTE (c1);
1155               if (c1 != ISO_CODE_ESC)
1156                 goto label_invalid_code;
1157               ONE_MORE_BYTE (c1);
1158               goto label_escape_sequence;
1159
1160             case '$':           /* designation of 2-byte character set */
1161               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1162                 goto label_invalid_code;
1163               ONE_MORE_BYTE (c1);
1164               if (c1 >= '@' && c1 <= 'B')
1165                 {       /* designation of JISX0208.1978, GB2312.1980,
1166                                    or JISX0208.1980 */
1167                   DECODE_DESIGNATION (0, 2, 94, c1);
1168                 }
1169               else if (c1 >= 0x28 && c1 <= 0x2B)
1170                 {       /* designation of DIMENSION2_CHARS94 character set */
1171                   ONE_MORE_BYTE (c2);
1172                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1173                 }
1174               else if (c1 >= 0x2C && c1 <= 0x2F)
1175                 {       /* designation of DIMENSION2_CHARS96 character set */
1176                   ONE_MORE_BYTE (c2);
1177                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1178                 }
1179               else
1180                 goto label_invalid_code;
1181               break;
1182
1183             case 'n':           /* invocation of locking-shift-2 */
1184               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1185                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1186                 goto label_invalid_code;
1187               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1188               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1189               break;
1190
1191             case 'o':           /* invocation of locking-shift-3 */
1192               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1193                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1194                 goto label_invalid_code;
1195               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1196               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1197               break;
1198
1199             case 'N':           /* invocation of single-shift-2 */
1200               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1201                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1202                 goto label_invalid_code;
1203               ONE_MORE_BYTE (c1);
1204               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1205               DECODE_ISO_CHARACTER (charset, c1);
1206               break;
1207
1208             case 'O':           /* invocation of single-shift-3 */
1209               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1210                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1211                 goto label_invalid_code;
1212               ONE_MORE_BYTE (c1);
1213               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1214               DECODE_ISO_CHARACTER (charset, c1);
1215               break;
1216
1217             case '0': case '2': /* start composing */
1218               /* Before processing composing, we must be sure that all
1219                  characters being composed are supported by CODING.
1220                  If not, we must give up composing and insert the
1221                  bunch of codes for composing as is without decoding.  */
1222               {
1223                 int result1;
1224
1225                 result1 = check_composing_code (coding, src, src_end);
1226                 if (result1 == 0)
1227                   coding->composing = (c1 == '0'
1228                                        ? COMPOSING_NO_RULE_HEAD
1229                                        : COMPOSING_WITH_RULE_HEAD);
1230                 else if (result1 > 0)
1231                   {
1232                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1233                       {
1234                         bcopy (src_base, dst, result1 + 2);
1235                         src += result1;
1236                         dst += result1 + 2;
1237                         coding->produced_char += result1 + 2;
1238                       }
1239                     else
1240                       {
1241                         result = CODING_FINISH_INSUFFICIENT_DST;
1242                         goto label_end_of_loop_2;
1243                       }
1244                   }
1245                 else
1246                   goto label_end_of_loop;
1247               }
1248               break;
1249
1250             case '1':           /* end composing */
1251               coding->composing = COMPOSING_NO;
1252               coding->produced_char++;
1253               break;
1254
1255             case '[':           /* specification of direction */
1256               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1257                 goto label_invalid_code;
1258               /* For the moment, nested direction is not supported.
1259                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1260                  left-to-right, and nozero means right-to-left.  */
1261               ONE_MORE_BYTE (c1);
1262               switch (c1)
1263                 {
1264                 case ']':       /* end of the current direction */
1265                   coding->mode &= ~CODING_MODE_DIRECTION;
1266
1267                 case '0':       /* end of the current direction */
1268                 case '1':       /* start of left-to-right direction */
1269                   ONE_MORE_BYTE (c1);
1270                   if (c1 == ']')
1271                     coding->mode &= ~CODING_MODE_DIRECTION;
1272                   else
1273                     goto label_invalid_code;
1274                   break;
1275
1276                 case '2':       /* start of right-to-left direction */
1277                   ONE_MORE_BYTE (c1);
1278                   if (c1 == ']')
1279                     coding->mode |= CODING_MODE_DIRECTION;
1280                   else
1281                     goto label_invalid_code;
1282                   break;
1283
1284                 default:
1285                   goto label_invalid_code;
1286                 }
1287               break;
1288
1289             default:
1290               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1291                 goto label_invalid_code;
1292               if (c1 >= 0x28 && c1 <= 0x2B)
1293                 {       /* designation of DIMENSION1_CHARS94 character set */
1294                   ONE_MORE_BYTE (c2);
1295                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1296                 }
1297               else if (c1 >= 0x2C && c1 <= 0x2F)
1298                 {       /* designation of DIMENSION1_CHARS96 character set */
1299                   ONE_MORE_BYTE (c2);
1300                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1301                 }
1302               else
1303                 {
1304                   goto label_invalid_code;
1305                 }
1306             }
1307           /* We must update these variables now.  */
1308           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1309           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1310           break;
1311
1312         label_invalid_code:
1313           coding->produced_char += src - src_base;
1314           while (src_base < src)
1315             *dst++ = *src_base++;
1316         }
1317       continue;
1318
1319     label_end_of_loop:
1320       result = CODING_FINISH_INSUFFICIENT_SRC;
1321     label_end_of_loop_2:
1322       src = src_base;
1323       break;
1324     }
1325
1326   if (result == CODING_FINISH_NORMAL
1327       && src < src_end)
1328     result = CODING_FINISH_INSUFFICIENT_DST;
1329
1330   /* If this is the last block of the text to be decoded, we had
1331      better just flush out all remaining codes in the text although
1332      they are not valid characters.  */
1333   if (coding->mode & CODING_MODE_LAST_BLOCK)
1334     {
1335       bcopy (src, dst, src_end - src);
1336       dst += (src_end - src);
1337       src = src_end;
1338     }
1339   coding->consumed = coding->consumed_char = src - source;
1340   coding->produced = dst - destination;
1341   return result;
1342 }
1343
1344 /* ISO2022 encoding stuff.  */
1345
1346 /*
1347    It is not enough to say just "ISO2022" on encoding, we have to
1348    specify more details.  In Emacs, each coding system of ISO2022
1349    variant has the following specifications:
1350         1. Initial designation to G0 thru G3.
1351         2. Allows short-form designation?
1352         3. ASCII should be designated to G0 before control characters?
1353         4. ASCII should be designated to G0 at end of line?
1354         5. 7-bit environment or 8-bit environment?
1355         6. Use locking-shift?
1356         7. Use Single-shift?
1357    And the following two are only for Japanese:
1358         8. Use ASCII in place of JIS0201-1976-Roman?
1359         9. Use JISX0208-1983 in place of JISX0208-1978?
1360    These specifications are encoded in `coding->flags' as flag bits
1361    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1362    details.
1363 */
1364
1365 /* Produce codes (escape sequence) for designating CHARSET to graphic
1366    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1367    the coding system CODING allows, produce designation sequence of
1368    short-form.  */
1369
1370 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1371   do {                                                                  \
1372     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1373     char *intermediate_char_94 = "()*+";                                \
1374     char *intermediate_char_96 = ",-./";                                \
1375     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1376     if (revision < 255)                                                 \
1377       {                                                                 \
1378         *dst++ = ISO_CODE_ESC;                                          \
1379         *dst++ = '&';                                                   \
1380         *dst++ = '@' + revision;                                        \
1381       }                                                                 \
1382     *dst++ = ISO_CODE_ESC;                                              \
1383     if (CHARSET_DIMENSION (charset) == 1)                               \
1384       {                                                                 \
1385         if (CHARSET_CHARS (charset) == 94)                              \
1386           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1387         else                                                            \
1388           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1389       }                                                                 \
1390     else                                                                \
1391       {                                                                 \
1392         *dst++ = '$';                                                   \
1393         if (CHARSET_CHARS (charset) == 94)                              \
1394           {                                                             \
1395             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1396                 || reg != 0                                             \
1397                 || final_char < '@' || final_char > 'B')                \
1398               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1399           }                                                             \
1400         else                                                            \
1401           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1402       }                                                                 \
1403     *dst++ = final_char;                                                \
1404     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1405   } while (0)
1406
1407 /* The following two macros produce codes (control character or escape
1408    sequence) for ISO2022 single-shift functions (single-shift-2 and
1409    single-shift-3).  */
1410
1411 #define ENCODE_SINGLE_SHIFT_2                           \
1412   do {                                                  \
1413     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1414       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1415     else                                                \
1416       *dst++ = ISO_CODE_SS2;                            \
1417     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1418   } while (0)
1419
1420 #define ENCODE_SINGLE_SHIFT_3                           \
1421   do {                                                  \
1422     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1423       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1424     else                                                \
1425       *dst++ = ISO_CODE_SS3;                            \
1426     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1427   } while (0)
1428
1429 /* The following four macros produce codes (control character or
1430    escape sequence) for ISO2022 locking-shift functions (shift-in,
1431    shift-out, locking-shift-2, and locking-shift-3).  */
1432
1433 #define ENCODE_SHIFT_IN                         \
1434   do {                                          \
1435     *dst++ = ISO_CODE_SI;                       \
1436     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1437   } while (0)
1438
1439 #define ENCODE_SHIFT_OUT                        \
1440   do {                                          \
1441     *dst++ = ISO_CODE_SO;                       \
1442     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1443   } while (0)
1444
1445 #define ENCODE_LOCKING_SHIFT_2                  \
1446   do {                                          \
1447     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1448     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1449   } while (0)
1450
1451 #define ENCODE_LOCKING_SHIFT_3                  \
1452   do {                                          \
1453     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1454     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1455   } while (0)
1456
1457 /* Produce codes for a DIMENSION1 character whose character set is
1458    CHARSET and whose position-code is C1.  Designation and invocation
1459    sequences are also produced in advance if necessary.  */
1460
1461
1462 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1463   do {                                                                  \
1464     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1465       {                                                                 \
1466         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1467           *dst++ = c1 & 0x7F;                                           \
1468         else                                                            \
1469           *dst++ = c1 | 0x80;                                           \
1470         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1471         break;                                                          \
1472       }                                                                 \
1473     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1474       {                                                                 \
1475         *dst++ = c1 & 0x7F;                                             \
1476         break;                                                          \
1477       }                                                                 \
1478     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1479       {                                                                 \
1480         *dst++ = c1 | 0x80;                                             \
1481         break;                                                          \
1482       }                                                                 \
1483     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1484              && !coding->safe_charsets[charset])                        \
1485       {                                                                 \
1486         /* We should not encode this character, instead produce one or  \
1487            two `?'s.  */                                                \
1488         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1489         if (CHARSET_WIDTH (charset) == 2)                               \
1490           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1491         break;                                                          \
1492       }                                                                 \
1493     else                                                                \
1494       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1495          must invoke it, or, at first, designate it to some graphic     \
1496          register.  Then repeat the loop to actually produce the        \
1497          character.  */                                                 \
1498       dst = encode_invocation_designation (charset, coding, dst);       \
1499   } while (1)
1500
1501 /* Produce codes for a DIMENSION2 character whose character set is
1502    CHARSET and whose position-codes are C1 and C2.  Designation and
1503    invocation codes are also produced in advance if necessary.  */
1504
1505 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1506   do {                                                                  \
1507     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1508       {                                                                 \
1509         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1510           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1511         else                                                            \
1512           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1513         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1514         break;                                                          \
1515       }                                                                 \
1516     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1517       {                                                                 \
1518         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1519         break;                                                          \
1520       }                                                                 \
1521     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1522       {                                                                 \
1523         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1524         break;                                                          \
1525       }                                                                 \
1526     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1527              && !coding->safe_charsets[charset])                        \
1528       {                                                                 \
1529         /* We should not encode this character, instead produce one or  \
1530            two `?'s.  */                                                \
1531         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1532         if (CHARSET_WIDTH (charset) == 2)                               \
1533           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1534         break;                                                          \
1535       }                                                                 \
1536     else                                                                \
1537       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1538          must invoke it, or, at first, designate it to some graphic     \
1539          register.  Then repeat the loop to actually produce the        \
1540          character.  */                                                 \
1541       dst = encode_invocation_designation (charset, coding, dst);       \
1542   } while (1)
1543
1544 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                             \
1545   do {                                                                    \
1546     int c_alt, charset_alt;                                               \
1547     if (!NILP (unification_table)                                         \
1548         && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
1549             >= 0))                                                        \
1550       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                            \
1551     else                                                                  \
1552       charset_alt = charset;                                              \
1553     if (CHARSET_DIMENSION (charset_alt) == 1)                             \
1554       {                                                                   \
1555         if (charset == CHARSET_ASCII                                      \
1556             && coding->flags & CODING_FLAG_ISO_USE_ROMAN)                 \
1557           charset_alt = charset_latin_jisx0201;                           \
1558         ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);                \
1559       }                                                                   \
1560     else                                                                  \
1561       {                                                                   \
1562         if (charset == charset_jisx0208                                   \
1563             && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)                \
1564           charset_alt = charset_jisx0208_1978;                            \
1565         ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);            \
1566       }                                                                   \
1567     if (! COMPOSING_P (coding->composing))                                \
1568       coding->consumed_char++;                                            \
1569      } while (0)
1570
1571 /* Produce designation and invocation codes at a place pointed by DST
1572    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1573    Return new DST.  */
1574
1575 unsigned char *
1576 encode_invocation_designation (charset, coding, dst)
1577      int charset;
1578      struct coding_system *coding;
1579      unsigned char *dst;
1580 {
1581   int reg;                      /* graphic register number */
1582
1583   /* At first, check designations.  */
1584   for (reg = 0; reg < 4; reg++)
1585     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1586       break;
1587
1588   if (reg >= 4)
1589     {
1590       /* CHARSET is not yet designated to any graphic registers.  */
1591       /* At first check the requested designation.  */
1592       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1593       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1594         /* Since CHARSET requests no special designation, designate it
1595            to graphic register 0.  */
1596         reg = 0;
1597
1598       ENCODE_DESIGNATION (charset, reg, coding);
1599     }
1600
1601   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1602       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1603     {
1604       /* Since the graphic register REG is not invoked to any graphic
1605          planes, invoke it to graphic plane 0.  */
1606       switch (reg)
1607         {
1608         case 0:                 /* graphic register 0 */
1609           ENCODE_SHIFT_IN;
1610           break;
1611
1612         case 1:                 /* graphic register 1 */
1613           ENCODE_SHIFT_OUT;
1614           break;
1615
1616         case 2:                 /* graphic register 2 */
1617           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1618             ENCODE_SINGLE_SHIFT_2;
1619           else
1620             ENCODE_LOCKING_SHIFT_2;
1621           break;
1622
1623         case 3:                 /* graphic register 3 */
1624           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1625             ENCODE_SINGLE_SHIFT_3;
1626           else
1627             ENCODE_LOCKING_SHIFT_3;
1628           break;
1629         }
1630     }
1631   return dst;
1632 }
1633
1634 /* The following two macros produce codes for indicating composition.  */
1635 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1636 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1637 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1638
1639 /* The following three macros produce codes for indicating direction
1640    of text.  */
1641 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1642   do {                                                  \
1643     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1644       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1645     else                                                \
1646       *dst++ = ISO_CODE_CSI;                            \
1647   } while (0)
1648
1649 #define ENCODE_DIRECTION_R2L    \
1650   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1651
1652 #define ENCODE_DIRECTION_L2R    \
1653   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1654
1655 /* Produce codes for designation and invocation to reset the graphic
1656    planes and registers to initial state.  */
1657 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1658   do {                                                                      \
1659     int reg;                                                                \
1660     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1661       ENCODE_SHIFT_IN;                                                      \
1662     for (reg = 0; reg < 4; reg++)                                           \
1663       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1664           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1665               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1666         ENCODE_DESIGNATION                                                  \
1667           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1668   } while (0)
1669
1670 /* Produce designation sequences of charsets in the line started from
1671    SRC to a place pointed by *DSTP, and update DSTP.
1672
1673    If the current block ends before any end-of-line, we may fail to
1674    find all the necessary designations.  */
1675
1676 encode_designation_at_bol (coding, table, src, src_end, dstp)
1677      struct coding_system *coding;
1678      Lisp_Object table;
1679      unsigned char *src, *src_end, **dstp;
1680 {
1681   int charset, c, found = 0, reg;
1682   /* Table of charsets to be designated to each graphic register.  */
1683   int r[4];
1684   unsigned char *dst = *dstp;
1685
1686   for (reg = 0; reg < 4; reg++)
1687     r[reg] = -1;
1688
1689   while (src < src_end && *src != '\n' && found < 4)
1690     {
1691       int bytes = BYTES_BY_CHAR_HEAD (*src);
1692
1693       if (NILP (table))
1694         charset = CHARSET_AT (src);
1695       else
1696         {
1697           int c_alt;
1698           unsigned char c1, c2;
1699
1700           SPLIT_STRING(src, bytes, charset, c1, c2);
1701           if ((c_alt = unify_char (table, -1, charset, c1, c2)) >= 0)
1702             charset = CHAR_CHARSET (c_alt);
1703         }
1704
1705       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1706       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1707         {
1708           found++;
1709           r[reg] = charset;
1710         }
1711
1712       src += bytes;
1713     }
1714
1715   if (found)
1716     {
1717       for (reg = 0; reg < 4; reg++)
1718         if (r[reg] >= 0
1719             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1720           ENCODE_DESIGNATION (r[reg], reg, coding);
1721       *dstp = dst;
1722     }
1723 }
1724
1725 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1726
1727 int
1728 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1729      struct coding_system *coding;
1730      unsigned char *source, *destination;
1731      int src_bytes, dst_bytes;
1732 {
1733   unsigned char *src = source;
1734   unsigned char *src_end = source + src_bytes;
1735   unsigned char *dst = destination;
1736   unsigned char *dst_end = destination + dst_bytes;
1737   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1738      from DST_END to assure overflow checking is necessary only at the
1739      head of loop.  */
1740   unsigned char *adjusted_dst_end = dst_end - 19;
1741   Lisp_Object unification_table
1742       = coding->character_unification_table_for_encode;
1743   int result = CODING_FINISH_NORMAL;
1744
1745   if (!NILP (Venable_character_unification) && NILP (unification_table))
1746     unification_table = Vstandard_character_unification_table_for_encode;
1747
1748   coding->consumed_char = 0;
1749   while (src < src_end && (dst_bytes
1750                            ? (dst < adjusted_dst_end)
1751                            : (dst < src - 19)))
1752     {
1753       /* SRC_BASE remembers the start position in source in each loop.
1754          The loop will be exited when there's not enough source text
1755          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1756          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1757          reset to SRC_BASE before exiting.  */
1758       unsigned char *src_base = src;
1759       int charset, c1, c2, c3, c4;
1760
1761       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1762           && CODING_SPEC_ISO_BOL (coding))
1763         {
1764           /* We have to produce designation sequences if any now.  */
1765           encode_designation_at_bol (coding, unification_table,
1766                                      src, src_end, &dst);
1767           CODING_SPEC_ISO_BOL (coding) = 0;
1768         }
1769
1770       c1 = *src++;
1771       /* If we are seeing a component of a composite character, we are
1772          seeing a leading-code encoded irregularly for composition, or
1773          a composition rule if composing with rule.  We must set C1 to
1774          a normal leading-code or an ASCII code.  If we are not seeing
1775          a composite character, we must reset composition,
1776          designation, and invocation states.  */
1777       if (COMPOSING_P (coding->composing))
1778         {
1779           if (c1 < 0xA0)
1780             {
1781               /* We are not in a composite character any longer.  */
1782               coding->composing = COMPOSING_NO;
1783               ENCODE_RESET_PLANE_AND_REGISTER;
1784               ENCODE_COMPOSITION_END;
1785             }
1786           else
1787             {
1788               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1789                 {
1790                   *dst++ = c1 & 0x7F;
1791                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1792                   continue;
1793                 }
1794               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1795                 coding->composing = COMPOSING_WITH_RULE_RULE;
1796               if (c1 == 0xA0)
1797                 {
1798                   /* This is an ASCII component.  */
1799                   ONE_MORE_BYTE (c1);
1800                   c1 &= 0x7F;
1801                 }
1802               else
1803                 /* This is a leading-code of non ASCII component.  */
1804                 c1 -= 0x20;
1805             }
1806         }
1807
1808       /* Now encode one character.  C1 is a control character, an
1809          ASCII character, or a leading-code of multi-byte character.  */
1810       switch (emacs_code_class[c1])
1811         {
1812         case EMACS_ascii_code:
1813           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1814           break;
1815
1816         case EMACS_control_code:
1817           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1818             ENCODE_RESET_PLANE_AND_REGISTER;
1819           *dst++ = c1;
1820           coding->consumed_char++;
1821           break;
1822
1823         case EMACS_carriage_return_code:
1824           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1825             {
1826               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1827                 ENCODE_RESET_PLANE_AND_REGISTER;
1828               *dst++ = c1;
1829               coding->consumed_char++;
1830               break;
1831             }
1832           /* fall down to treat '\r' as '\n' ...  */
1833
1834         case EMACS_linefeed_code:
1835           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1836             ENCODE_RESET_PLANE_AND_REGISTER;
1837           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1838             bcopy (coding->spec.iso2022.initial_designation,
1839                    coding->spec.iso2022.current_designation,
1840                    sizeof coding->spec.iso2022.initial_designation);
1841           if (coding->eol_type == CODING_EOL_LF
1842               || coding->eol_type == CODING_EOL_UNDECIDED)
1843             *dst++ = ISO_CODE_LF;
1844           else if (coding->eol_type == CODING_EOL_CRLF)
1845             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1846           else
1847             *dst++ = ISO_CODE_CR;
1848           CODING_SPEC_ISO_BOL (coding) = 1;
1849           coding->consumed_char++;
1850           break;
1851
1852         case EMACS_leading_code_2:
1853           ONE_MORE_BYTE (c2);
1854           if (c2 < 0xA0)
1855             {
1856               /* invalid sequence */
1857               *dst++ = c1;
1858               *dst++ = c2;
1859               coding->consumed_char += 2;
1860             }
1861           else
1862             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1863           break;
1864
1865         case EMACS_leading_code_3:
1866           TWO_MORE_BYTES (c2, c3);
1867           if (c2 < 0xA0 || c3 < 0xA0)
1868             {
1869               /* invalid sequence */
1870               *dst++ = c1;
1871               *dst++ = c2;
1872               *dst++ = c3;
1873               coding->consumed_char += 3;
1874             }
1875           else if (c1 < LEADING_CODE_PRIVATE_11)
1876             ENCODE_ISO_CHARACTER (c1, c2, c3);
1877           else
1878             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1879           break;
1880
1881         case EMACS_leading_code_4:
1882           THREE_MORE_BYTES (c2, c3, c4);
1883           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1884             {
1885               /* invalid sequence */
1886               *dst++ = c1;
1887               *dst++ = c2;
1888               *dst++ = c3;
1889               *dst++ = c4;
1890               coding->consumed_char += 4;
1891             }
1892           else
1893             ENCODE_ISO_CHARACTER (c2, c3, c4);
1894           break;
1895
1896         case EMACS_leading_code_composition:
1897           ONE_MORE_BYTE (c2);
1898           if (c2 < 0xA0)
1899             {
1900               /* invalid sequence */
1901               *dst++ = c1;
1902               *dst++ = c2;
1903               coding->consumed_char += 2;
1904             }
1905           else if (c2 == 0xFF)
1906             {
1907               ENCODE_RESET_PLANE_AND_REGISTER;
1908               coding->composing = COMPOSING_WITH_RULE_HEAD;
1909               ENCODE_COMPOSITION_WITH_RULE_START;
1910               coding->consumed_char++;
1911             }
1912           else
1913             {
1914               ENCODE_RESET_PLANE_AND_REGISTER;
1915               /* Rewind one byte because it is a character code of
1916                  composition elements.  */
1917               src--;
1918               coding->composing = COMPOSING_NO_RULE_HEAD;
1919               ENCODE_COMPOSITION_NO_RULE_START;
1920               coding->consumed_char++;
1921             }
1922           break;
1923
1924         case EMACS_invalid_code:
1925           *dst++ = c1;
1926           coding->consumed_char++;
1927           break;
1928         }
1929       continue;
1930     label_end_of_loop:
1931       result = CODING_FINISH_INSUFFICIENT_SRC;
1932       src = src_base;
1933       break;
1934     }
1935
1936   if (result == CODING_FINISH_NORMAL
1937       && src < src_end)
1938     result = CODING_FINISH_INSUFFICIENT_DST;
1939
1940   /* If this is the last block of the text to be encoded, we must
1941      reset graphic planes and registers to the initial state, and
1942      flush out the carryover if any.  */
1943   if (coding->mode & CODING_MODE_LAST_BLOCK)
1944     ENCODE_RESET_PLANE_AND_REGISTER;
1945
1946   coding->consumed = src - source;
1947   coding->produced = coding->produced_char = dst - destination;
1948   return result;
1949 }
1950
1951 \f
1952 /*** 4. SJIS and BIG5 handlers ***/
1953
1954 /* Although SJIS and BIG5 are not ISO's coding system, they are used
1955    quite widely.  So, for the moment, Emacs supports them in the bare
1956    C code.  But, in the future, they may be supported only by CCL.  */
1957
1958 /* SJIS is a coding system encoding three character sets: ASCII, right
1959    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1960    as is.  A character of charset katakana-jisx0201 is encoded by
1961    "position-code + 0x80".  A character of charset japanese-jisx0208
1962    is encoded in 2-byte but two position-codes are divided and shifted
1963    so that it fit in the range below.
1964
1965    --- CODE RANGE of SJIS ---
1966    (character set)      (range)
1967    ASCII                0x00 .. 0x7F
1968    KATAKANA-JISX0201    0xA0 .. 0xDF
1969    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xFF
1970             (2nd byte)  0x40 .. 0xFF
1971    -------------------------------
1972
1973 */
1974
1975 /* BIG5 is a coding system encoding two character sets: ASCII and
1976    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
1977    character set and is encoded in two-byte.
1978
1979    --- CODE RANGE of BIG5 ---
1980    (character set)      (range)
1981    ASCII                0x00 .. 0x7F
1982    Big5 (1st byte)      0xA1 .. 0xFE
1983         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
1984    --------------------------
1985
1986    Since the number of characters in Big5 is larger than maximum
1987    characters in Emacs' charset (96x96), it can't be handled as one
1988    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
1989    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
1990    contains frequently used characters and the latter contains less
1991    frequently used characters.  */
1992
1993 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
1994    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
1995    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
1996    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
1997
1998 /* Number of Big5 characters which have the same code in 1st byte.  */
1999 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2000
2001 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2002   do {                                                                  \
2003     unsigned int temp                                                   \
2004       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2005     if (b1 < 0xC9)                                                      \
2006       charset = charset_big5_1;                                         \
2007     else                                                                \
2008       {                                                                 \
2009         charset = charset_big5_2;                                       \
2010         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2011       }                                                                 \
2012     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2013     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2014   } while (0)
2015
2016 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2017   do {                                                                  \
2018     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2019     if (charset == charset_big5_2)                                      \
2020       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2021     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2022     b2 = temp % BIG5_SAME_ROW;                                          \
2023     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2024   } while (0)
2025
2026 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2027   do {                                                                  \
2028     int c_alt, charset_alt = (charset);                                 \
2029     if (!NILP (unification_table)                                       \
2030         && ((c_alt = unify_char (unification_table,                     \
2031                                  -1, (charset), c1, c2)) >= 0))         \
2032           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
2033     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2034       DECODE_CHARACTER_ASCII (c1);                                      \
2035     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2036       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2037     else                                                                \
2038       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2039   } while (0)
2040
2041 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                       \
2042   do {                                                                    \
2043     int c_alt, charset_alt;                                               \
2044     if (!NILP (unification_table)                                         \
2045         && ((c_alt = unify_char (unification_table, -1, charset, c1, c2)) \
2046             >= 0))                                                        \
2047       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                            \
2048     else                                                                  \
2049       charset_alt = charset;                                              \
2050     if (charset_alt == charset_ascii)                                     \
2051       *dst++ = c1;                                                        \
2052     else if (CHARSET_DIMENSION (charset_alt) == 1)                        \
2053       {                                                                   \
2054         if (sjis_p && charset_alt == charset_katakana_jisx0201)           \
2055           *dst++ = c1;                                                    \
2056         else                                                              \
2057           *dst++ = charset_alt, *dst++ = c1;                              \
2058       }                                                                   \
2059     else                                                                  \
2060       {                                                                   \
2061         c1 &= 0x7F, c2 &= 0x7F;                                           \
2062         if (sjis_p && charset_alt == charset_jisx0208)                    \
2063           {                                                               \
2064             unsigned char s1, s2;                                         \
2065                                                                           \
2066             ENCODE_SJIS (c1, c2, s1, s2);                                 \
2067             *dst++ = s1, *dst++ = s2;                                     \
2068           }                                                               \
2069         else if (!sjis_p                                                  \
2070                  && (charset_alt == charset_big5_1                        \
2071                      || charset_alt == charset_big5_2))                   \
2072           {                                                               \
2073             unsigned char b1, b2;                                         \
2074                                                                           \
2075             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);                    \
2076             *dst++ = b1, *dst++ = b2;                                     \
2077           }                                                               \
2078         else                                                              \
2079           *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;                 \
2080       }                                                                   \
2081     coding->consumed_char++;                                              \
2082   } while (0);
2083
2084 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2085    Check if a text is encoded in SJIS.  If it is, return
2086    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2087
2088 int
2089 detect_coding_sjis (src, src_end)
2090      unsigned char *src, *src_end;
2091 {
2092   unsigned char c;
2093
2094   while (src < src_end)
2095     {
2096       c = *src++;
2097       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2098         {
2099           if (src < src_end && *src++ < 0x40)
2100             return 0;
2101         }
2102     }
2103   return CODING_CATEGORY_MASK_SJIS;
2104 }
2105
2106 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2107    Check if a text is encoded in BIG5.  If it is, return
2108    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2109
2110 int
2111 detect_coding_big5 (src, src_end)
2112      unsigned char *src, *src_end;
2113 {
2114   unsigned char c;
2115
2116   while (src < src_end)
2117     {
2118       c = *src++;
2119       if (c >= 0xA1)
2120         {
2121           if (src >= src_end)
2122             break;
2123           c = *src++;
2124           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2125             return 0;
2126         }
2127     }
2128   return CODING_CATEGORY_MASK_BIG5;
2129 }
2130
2131 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2132    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2133
2134 int
2135 decode_coding_sjis_big5 (coding, source, destination,
2136                          src_bytes, dst_bytes, sjis_p)
2137      struct coding_system *coding;
2138      unsigned char *source, *destination;
2139      int src_bytes, dst_bytes;
2140      int sjis_p;
2141 {
2142   unsigned char *src = source;
2143   unsigned char *src_end = source + src_bytes;
2144   unsigned char *dst = destination;
2145   unsigned char *dst_end = destination + dst_bytes;
2146   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2147      from DST_END to assure overflow checking is necessary only at the
2148      head of loop.  */
2149   unsigned char *adjusted_dst_end = dst_end - 3;
2150   Lisp_Object unification_table
2151       = coding->character_unification_table_for_decode;
2152   int result = CODING_FINISH_NORMAL;
2153
2154   if (!NILP (Venable_character_unification) && NILP (unification_table))
2155     unification_table = Vstandard_character_unification_table_for_decode;
2156
2157   coding->produced_char = 0;
2158   while (src < src_end && (dst_bytes
2159                            ? (dst < adjusted_dst_end)
2160                            : (dst < src - 3)))
2161     {
2162       /* SRC_BASE remembers the start position in source in each loop.
2163          The loop will be exited when there's not enough source text
2164          to analyze two-byte character (within macro ONE_MORE_BYTE).
2165          In that case, SRC is reset to SRC_BASE before exiting.  */
2166       unsigned char *src_base = src;
2167       unsigned char c1 = *src++, c2, c3, c4;
2168
2169       if (c1 < 0x20)
2170         {
2171           if (c1 == '\r')
2172             {
2173               if (coding->eol_type == CODING_EOL_CRLF)
2174                 {
2175                   ONE_MORE_BYTE (c2);
2176                   if (c2 == '\n')
2177                     *dst++ = c2;
2178                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2179                     {
2180                       result = CODING_FINISH_INCONSISTENT_EOL;
2181                       goto label_end_of_loop_2;
2182                     }
2183                   else
2184                     /* To process C2 again, SRC is subtracted by 1.  */
2185                     *dst++ = c1, src--;
2186                 }
2187               else if (coding->eol_type == CODING_EOL_CR)
2188                 *dst++ = '\n';
2189               else
2190                 *dst++ = c1;
2191             }
2192           else if (c1 == '\n'
2193                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2194                    && (coding->eol_type == CODING_EOL_CR
2195                        || coding->eol_type == CODING_EOL_CRLF))
2196             {
2197               result = CODING_FINISH_INCONSISTENT_EOL;
2198               goto label_end_of_loop_2;
2199             }
2200           else
2201             *dst++ = c1;
2202           coding->produced_char++;
2203         }
2204       else if (c1 < 0x80)
2205         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2206       else if (c1 < 0xA0 || c1 >= 0xE0)
2207         {
2208           /* SJIS -> JISX0208, BIG5 -> Big5 (only if 0xE0 <= c1 < 0xFF) */
2209           if (sjis_p)
2210             {
2211               ONE_MORE_BYTE (c2);
2212               DECODE_SJIS (c1, c2, c3, c4);
2213               DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2214             }
2215           else if (c1 >= 0xE0 && c1 < 0xFF)
2216             {
2217               int charset;
2218
2219               ONE_MORE_BYTE (c2);
2220               DECODE_BIG5 (c1, c2, charset, c3, c4);
2221               DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2222             }
2223           else                  /* Invalid code */
2224             {
2225               *dst++ = c1;
2226               coding->produced_char++;
2227             }
2228         }
2229       else
2230         {
2231           /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
2232           if (sjis_p)
2233             DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2234                                         /* dummy */ c2);
2235           else
2236             {
2237               int charset;
2238
2239               ONE_MORE_BYTE (c2);
2240               DECODE_BIG5 (c1, c2, charset, c3, c4);
2241               DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2242             }
2243         }
2244       continue;
2245
2246     label_end_of_loop:
2247       result = CODING_FINISH_INSUFFICIENT_SRC;
2248     label_end_of_loop_2:
2249       src = src_base;
2250       break;
2251     }
2252
2253   if (result == CODING_FINISH_NORMAL
2254       && src < src_end)
2255     result = CODING_FINISH_INSUFFICIENT_DST;
2256
2257   coding->consumed = coding->consumed_char = src - source;
2258   coding->produced = dst - destination;
2259   return result;
2260 }
2261
2262 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2263    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2264    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2265    sure that all these charsets are registered as official charset
2266    (i.e. do not have extended leading-codes).  Characters of other
2267    charsets are produced without any encoding.  If SJIS_P is 1, encode
2268    SJIS text, else encode BIG5 text.  */
2269
2270 int
2271 encode_coding_sjis_big5 (coding, source, destination,
2272                          src_bytes, dst_bytes, sjis_p)
2273      struct coding_system *coding;
2274      unsigned char *source, *destination;
2275      int src_bytes, dst_bytes;
2276      int sjis_p;
2277 {
2278   unsigned char *src = source;
2279   unsigned char *src_end = source + src_bytes;
2280   unsigned char *dst = destination;
2281   unsigned char *dst_end = destination + dst_bytes;
2282   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2283      from DST_END to assure overflow checking is necessary only at the
2284      head of loop.  */
2285   unsigned char *adjusted_dst_end = dst_end - 1;
2286   Lisp_Object unification_table
2287       = coding->character_unification_table_for_encode;
2288   int result = CODING_FINISH_NORMAL;
2289
2290   if (!NILP (Venable_character_unification) && NILP (unification_table))
2291     unification_table = Vstandard_character_unification_table_for_encode;
2292
2293   coding->consumed_char = 0;
2294   while (src < src_end && (dst_bytes
2295                            ? (dst < adjusted_dst_end)
2296                            : (dst < src - 1)))
2297     {
2298       /* SRC_BASE remembers the start position in source in each loop.
2299          The loop will be exited when there's not enough source text
2300          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2301          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2302          before exiting.  */
2303       unsigned char *src_base = src;
2304       unsigned char c1 = *src++, c2, c3, c4;
2305
2306       if (coding->composing)
2307         {
2308           if (c1 == 0xA0)
2309             {
2310               ONE_MORE_BYTE (c1);
2311               c1 &= 0x7F;
2312             }
2313           else if (c1 >= 0xA0)
2314             c1 -= 0x20;
2315           else
2316             coding->composing = 0;
2317         }
2318
2319       switch (emacs_code_class[c1])
2320         {
2321         case EMACS_ascii_code:
2322           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2323           break;
2324
2325         case EMACS_control_code:
2326           *dst++ = c1;
2327           coding->consumed_char++;
2328           break;
2329
2330         case EMACS_carriage_return_code:
2331           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2332             {
2333               *dst++ = c1;
2334               coding->consumed_char++;
2335               break;
2336             }
2337           /* fall down to treat '\r' as '\n' ...  */
2338
2339         case EMACS_linefeed_code:
2340           if (coding->eol_type == CODING_EOL_LF
2341               || coding->eol_type == CODING_EOL_UNDECIDED)
2342             *dst++ = '\n';
2343           else if (coding->eol_type == CODING_EOL_CRLF)
2344             *dst++ = '\r', *dst++ = '\n';
2345           else
2346             *dst++ = '\r';
2347           coding->consumed_char++;
2348           break;
2349
2350         case EMACS_leading_code_2:
2351           ONE_MORE_BYTE (c2);
2352           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2353           break;
2354
2355         case EMACS_leading_code_3:
2356           TWO_MORE_BYTES (c2, c3);
2357           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2358           break;
2359
2360         case EMACS_leading_code_4:
2361           THREE_MORE_BYTES (c2, c3, c4);
2362           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2363           break;
2364
2365         case EMACS_leading_code_composition:
2366           coding->composing = 1;
2367           break;
2368
2369         default:                /* i.e. case EMACS_invalid_code: */
2370           *dst++ = c1;
2371           coding->consumed_char++;
2372         }
2373       continue;
2374
2375     label_end_of_loop:
2376       result = CODING_FINISH_INSUFFICIENT_SRC;
2377       src = src_base;
2378       break;
2379     }
2380
2381   if (result == CODING_FINISH_NORMAL
2382       && src < src_end)
2383     result = CODING_FINISH_INSUFFICIENT_DST;
2384   coding->consumed = src - source;
2385   coding->produced = coding->produced_char = dst - destination;
2386   return result;
2387 }
2388
2389 \f
2390 /*** 5. End-of-line handlers ***/
2391
2392 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2393    This function is called only when `coding->eol_type' is
2394    CODING_EOL_CRLF or CODING_EOL_CR.  */
2395
2396 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2397      struct coding_system *coding;
2398      unsigned char *source, *destination;
2399      int src_bytes, dst_bytes;
2400 {
2401   unsigned char *src = source;
2402   unsigned char *src_end = source + src_bytes;
2403   unsigned char *dst = destination;
2404   unsigned char *dst_end = destination + dst_bytes;
2405   int result = CODING_FINISH_NORMAL;
2406
2407   if (src_bytes <= 0)
2408     return result;
2409
2410   switch (coding->eol_type)
2411     {
2412     case CODING_EOL_CRLF:
2413       {
2414         /* Since the maximum bytes produced by each loop is 2, we
2415            subtract 1 from DST_END to assure overflow checking is
2416            necessary only at the head of loop.  */
2417         unsigned char *adjusted_dst_end = dst_end - 1;
2418
2419         while (src < src_end && (dst_bytes
2420                                  ? (dst < adjusted_dst_end)
2421                                  : (dst < src - 1)))
2422           {
2423             unsigned char *src_base = src;
2424             unsigned char c = *src++;
2425             if (c == '\r')
2426               {
2427                 ONE_MORE_BYTE (c);
2428                 if (c != '\n')
2429                   {
2430                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2431                       {
2432                         result = CODING_FINISH_INCONSISTENT_EOL;
2433                         goto label_end_of_loop_2;
2434                       }
2435                     *dst++ = '\r';
2436                   }
2437                 *dst++ = c;
2438               }
2439             else if (c == '\n'
2440                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2441               {
2442                 result = CODING_FINISH_INCONSISTENT_EOL;
2443                 goto label_end_of_loop_2;
2444               }
2445             else
2446               *dst++ = c;
2447             continue;
2448
2449           label_end_of_loop:
2450             result = CODING_FINISH_INSUFFICIENT_SRC;
2451           label_end_of_loop_2:
2452             src = src_base;
2453             break;
2454           }
2455         if (result == CODING_FINISH_NORMAL
2456             && src < src_end)
2457           result = CODING_FINISH_INSUFFICIENT_DST;
2458       }
2459       break;
2460
2461     case CODING_EOL_CR:
2462       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2463         {
2464           while (src < src_end) if (*src++ == '\n') break;
2465           if (*--src == '\n')
2466             {
2467               src_bytes = src - source;
2468               result = CODING_FINISH_INCONSISTENT_EOL;
2469             }
2470         }
2471       if (dst_bytes && src_bytes > dst_bytes)
2472         {
2473           result = CODING_FINISH_INSUFFICIENT_DST;
2474           src_bytes = dst_bytes;
2475         }
2476       if (dst_bytes)
2477         bcopy (source, destination, src_bytes);
2478       else
2479         safe_bcopy (source, destination, src_bytes);
2480       src = source + src_bytes;
2481       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2482       break;
2483
2484     default:                    /* i.e. case: CODING_EOL_LF */
2485       if (dst_bytes && src_bytes > dst_bytes)
2486         {
2487           result = CODING_FINISH_INSUFFICIENT_DST;
2488           src_bytes = dst_bytes;
2489         }
2490       if (dst_bytes)
2491         bcopy (source, destination, src_bytes);
2492       else
2493         safe_bcopy (source, destination, src_bytes);
2494       src += src_bytes;
2495       dst += dst_bytes;
2496       break;
2497     }
2498
2499   coding->consumed = coding->consumed_char = src - source;
2500   coding->produced = coding->produced_char = dst - destination;
2501   return result;
2502 }
2503
2504 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2505    format of end-of-line according to `coding->eol_type'.  If
2506    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2507    '\r' in source text also means end-of-line.  */
2508
2509 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2510      struct coding_system *coding;
2511      unsigned char *source, *destination;
2512      int src_bytes, dst_bytes;
2513 {
2514   unsigned char *src = source;
2515   unsigned char *dst = destination;
2516   int result = CODING_FINISH_NORMAL;
2517
2518   if (coding->eol_type == CODING_EOL_CRLF)
2519     {
2520       unsigned char c;
2521       unsigned char *src_end = source + src_bytes;
2522       unsigned char *dst_end = destination + dst_bytes;
2523       /* Since the maximum bytes produced by each loop is 2, we
2524          subtract 1 from DST_END to assure overflow checking is
2525          necessary only at the head of loop.  */
2526       unsigned char *adjusted_dst_end = dst_end - 1;
2527
2528       while (src < src_end && (dst_bytes
2529                                ? (dst < adjusted_dst_end)
2530                                : (dst < src - 1)))
2531         {
2532           c = *src++;
2533           if (c == '\n'
2534               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2535             *dst++ = '\r', *dst++ = '\n';
2536           else
2537             *dst++ = c;
2538         }
2539       if (src < src_end)
2540         result = CODING_FINISH_INSUFFICIENT_DST;
2541     }
2542   else
2543     {
2544       if (dst_bytes && src_bytes > dst_bytes)
2545         {
2546           src_bytes = dst_bytes;
2547           result = CODING_FINISH_INSUFFICIENT_DST;
2548         }
2549       if (dst_bytes)
2550         bcopy (source, destination, src_bytes);
2551       else
2552         safe_bcopy (source, destination, src_bytes);
2553       if (coding->eol_type == CODING_EOL_CRLF)
2554         {
2555           while (src_bytes--)
2556             if (*dst++ == '\n') dst[-1] = '\r';
2557         }
2558       else if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2559         {
2560           while (src_bytes--)
2561             if (*dst++ == '\r') dst[-1] = '\n';
2562         }
2563       src += src_bytes;
2564       dst += src_bytes;
2565     }
2566
2567   coding->consumed = coding->consumed_char = src - source;
2568   coding->produced = coding->produced_char = dst - destination;
2569   return result;
2570 }
2571
2572 \f
2573 /*** 6. C library functions ***/
2574
2575 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2576    has a property `coding-system'.  The value of this property is a
2577    vector of length 5 (called as coding-vector).  Among elements of
2578    this vector, the first (element[0]) and the fifth (element[4])
2579    carry important information for decoding/encoding.  Before
2580    decoding/encoding, this information should be set in fields of a
2581    structure of type `coding_system'.
2582
2583    A value of property `coding-system' can be a symbol of another
2584    subsidiary coding-system.  In that case, Emacs gets coding-vector
2585    from that symbol.
2586
2587    `element[0]' contains information to be set in `coding->type'.  The
2588    value and its meaning is as follows:
2589
2590    0 -- coding_type_emacs_mule
2591    1 -- coding_type_sjis
2592    2 -- coding_type_iso2022
2593    3 -- coding_type_big5
2594    4 -- coding_type_ccl encoder/decoder written in CCL
2595    nil -- coding_type_no_conversion
2596    t -- coding_type_undecided (automatic conversion on decoding,
2597                                no-conversion on encoding)
2598
2599    `element[4]' contains information to be set in `coding->flags' and
2600    `coding->spec'.  The meaning varies by `coding->type'.
2601
2602    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2603    of length 32 (of which the first 13 sub-elements are used now).
2604    Meanings of these sub-elements are:
2605
2606    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2607         If the value is an integer of valid charset, the charset is
2608         assumed to be designated to graphic register N initially.
2609
2610         If the value is minus, it is a minus value of charset which
2611         reserves graphic register N, which means that the charset is
2612         not designated initially but should be designated to graphic
2613         register N just before encoding a character in that charset.
2614
2615         If the value is nil, graphic register N is never used on
2616         encoding.
2617
2618    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2619         Each value takes t or nil.  See the section ISO2022 of
2620         `coding.h' for more information.
2621
2622    If `coding->type' is `coding_type_big5', element[4] is t to denote
2623    BIG5-ETen or nil to denote BIG5-HKU.
2624
2625    If `coding->type' takes the other value, element[4] is ignored.
2626
2627    Emacs Lisp's coding system also carries information about format of
2628    end-of-line in a value of property `eol-type'.  If the value is
2629    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2630    means CODING_EOL_CR.  If it is not integer, it should be a vector
2631    of subsidiary coding systems of which property `eol-type' has one
2632    of above values.
2633
2634 */
2635
2636 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2637    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2638    is setup so that no conversion is necessary and return -1, else
2639    return 0.  */
2640
2641 int
2642 setup_coding_system (coding_system, coding)
2643      Lisp_Object coding_system;
2644      struct coding_system *coding;
2645 {
2646   Lisp_Object coding_spec, coding_type, eol_type, plist;
2647   Lisp_Object val;
2648   int i;
2649
2650   /* Initialize some fields required for all kinds of coding systems.  */
2651   coding->symbol = coding_system;
2652   coding->common_flags = 0;
2653   coding->mode = 0;
2654   coding->heading_ascii = -1;
2655   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2656   coding_spec = Fget (coding_system, Qcoding_system);
2657   if (!VECTORP (coding_spec)
2658       || XVECTOR (coding_spec)->size != 5
2659       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2660     goto label_invalid_coding_system;
2661
2662   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2663   if (VECTORP (eol_type))
2664     {
2665       coding->eol_type = CODING_EOL_UNDECIDED;
2666       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2667     }
2668   else if (XFASTINT (eol_type) == 1)
2669     {
2670       coding->eol_type = CODING_EOL_CRLF;
2671       coding->common_flags
2672         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2673     }
2674   else if (XFASTINT (eol_type) == 2)
2675     {
2676       coding->eol_type = CODING_EOL_CR;
2677       coding->common_flags
2678         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2679     }
2680   else
2681     coding->eol_type = CODING_EOL_LF;
2682
2683   coding_type = XVECTOR (coding_spec)->contents[0];
2684   /* Try short cut.  */
2685   if (SYMBOLP (coding_type))
2686     {
2687       if (EQ (coding_type, Qt))
2688         {
2689           coding->type = coding_type_undecided;
2690           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2691         }
2692       else
2693         coding->type = coding_type_no_conversion;
2694       return 0;
2695     }
2696
2697   /* Initialize remaining fields.  */
2698   coding->composing = 0;
2699   coding->character_unification_table_for_decode = Qnil;
2700   coding->character_unification_table_for_encode = Qnil;
2701
2702   /* Get values of coding system properties:
2703      `post-read-conversion', `pre-write-conversion',
2704      `character-unification-table-for-decode',
2705      `character-unification-table-for-encode'.  */
2706   plist = XVECTOR (coding_spec)->contents[3];
2707   coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2708   coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2709   val = Fplist_get (plist, Qcharacter_unification_table_for_decode);
2710   if (SYMBOLP (val))
2711     val = Fget (val, Qcharacter_unification_table_for_decode);
2712   coding->character_unification_table_for_decode
2713     = CHAR_TABLE_P (val) ? val : Qnil;
2714   val = Fplist_get (plist, Qcharacter_unification_table_for_encode);
2715   if (SYMBOLP (val))
2716     val = Fget (val, Qcharacter_unification_table_for_encode);
2717   coding->character_unification_table_for_encode
2718     = CHAR_TABLE_P (val) ? val : Qnil;
2719   val = Fplist_get (plist, Qcoding_category);
2720   if (!NILP (val))
2721     {
2722       val = Fget (val, Qcoding_category_index);
2723       if (INTEGERP (val))
2724         coding->category_idx = XINT (val);
2725       else
2726         goto label_invalid_coding_system;
2727     }
2728   else
2729     goto label_invalid_coding_system;
2730
2731   val = Fplist_get (plist, Qsafe_charsets);
2732   if (EQ (val, Qt))
2733     {
2734       for (i = 0; i <= MAX_CHARSET; i++)
2735         coding->safe_charsets[i] = 1;
2736     }
2737   else
2738     {
2739       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2740       while (CONSP (val))
2741         {
2742           if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2743             coding->safe_charsets[i] = 1;
2744           val = XCONS (val)->cdr;
2745         }
2746     }
2747
2748   switch (XFASTINT (coding_type))
2749     {
2750     case 0:
2751       coding->type = coding_type_emacs_mule;
2752       if (!NILP (coding->post_read_conversion))
2753         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2754       if (!NILP (coding->pre_write_conversion))
2755         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
2756       break;
2757
2758     case 1:
2759       coding->type = coding_type_sjis;
2760       coding->common_flags
2761         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2762       break;
2763
2764     case 2:
2765       coding->type = coding_type_iso2022;
2766       coding->common_flags
2767         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2768       {
2769         Lisp_Object val, temp;
2770         Lisp_Object *flags;
2771         int i, charset, reg_bits = 0;
2772
2773         val = XVECTOR (coding_spec)->contents[4];
2774
2775         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2776           goto label_invalid_coding_system;
2777
2778         flags = XVECTOR (val)->contents;
2779         coding->flags
2780           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2781              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2782              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2783              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2784              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2785              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2786              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2787              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2788              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2789              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2790              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2791              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
2792              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
2793              );
2794
2795         /* Invoke graphic register 0 to plane 0.  */
2796         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2797         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2798         CODING_SPEC_ISO_INVOCATION (coding, 1)
2799           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2800         /* Not single shifting at first.  */
2801         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2802         /* Beginning of buffer should also be regarded as bol. */
2803         CODING_SPEC_ISO_BOL (coding) = 1;
2804
2805         for (charset = 0; charset <= MAX_CHARSET; charset++)
2806           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2807         val = Vcharset_revision_alist;
2808         while (CONSP (val))
2809           {
2810             charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2811             if (charset >= 0
2812                 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2813                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2814               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2815             val = XCONS (val)->cdr;
2816           }
2817
2818         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2819            FLAGS[REG] can be one of below:
2820                 integer CHARSET: CHARSET occupies register I,
2821                 t: designate nothing to REG initially, but can be used
2822                   by any charsets,
2823                 list of integer, nil, or t: designate the first
2824                   element (if integer) to REG initially, the remaining
2825                   elements (if integer) is designated to REG on request,
2826                   if an element is t, REG can be used by any charsets,
2827                 nil: REG is never used.  */
2828         for (charset = 0; charset <= MAX_CHARSET; charset++)
2829           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2830             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2831         for (i = 0; i < 4; i++)
2832           {
2833             if (INTEGERP (flags[i])
2834                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2835                 || (charset = get_charset_id (flags[i])) >= 0)
2836               {
2837                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2838                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2839               }
2840             else if (EQ (flags[i], Qt))
2841               {
2842                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2843                 reg_bits |= 1 << i;
2844                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2845               }
2846             else if (CONSP (flags[i]))
2847               {
2848                 Lisp_Object tail = flags[i];
2849
2850                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2851                 if (INTEGERP (XCONS (tail)->car)
2852                     && (charset = XINT (XCONS (tail)->car),
2853                         CHARSET_VALID_P (charset))
2854                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2855                   {
2856                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2857                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2858                   }
2859                 else
2860                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2861                 tail = XCONS (tail)->cdr;
2862                 while (CONSP (tail))
2863                   {
2864                     if (INTEGERP (XCONS (tail)->car)
2865                         && (charset = XINT (XCONS (tail)->car),
2866                             CHARSET_VALID_P (charset))
2867                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2868                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2869                         = i;
2870                     else if (EQ (XCONS (tail)->car, Qt))
2871                       reg_bits |= 1 << i;
2872                     tail = XCONS (tail)->cdr;
2873                   }
2874               }
2875             else
2876               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2877
2878             CODING_SPEC_ISO_DESIGNATION (coding, i)
2879               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
2880           }
2881
2882         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
2883           {
2884             /* REG 1 can be used only by locking shift in 7-bit env.  */
2885             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
2886               reg_bits &= ~2;
2887             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
2888               /* Without any shifting, only REG 0 and 1 can be used.  */
2889               reg_bits &= 3;
2890           }
2891
2892         if (reg_bits)
2893           for (charset = 0; charset <= MAX_CHARSET; charset++)
2894             {
2895               if (CHARSET_VALID_P (charset))
2896                 {
2897                   /* There exist some default graphic registers to be
2898                      used CHARSET.  */
2899
2900                   /* We had better avoid designating a charset of
2901                      CHARS96 to REG 0 as far as possible.  */
2902                   if (CHARSET_CHARS (charset) == 96)
2903                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2904                       = (reg_bits & 2
2905                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
2906                   else
2907                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2908                       = (reg_bits & 1
2909                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
2910                 }
2911             }
2912       }
2913       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
2914       coding->spec.iso2022.last_invalid_designation_register = -1;
2915       break;
2916
2917     case 3:
2918       coding->type = coding_type_big5;
2919       coding->common_flags
2920         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2921       coding->flags
2922         = (NILP (XVECTOR (coding_spec)->contents[4])
2923            ? CODING_FLAG_BIG5_HKU
2924            : CODING_FLAG_BIG5_ETEN);
2925       break;
2926
2927     case 4:
2928       coding->type = coding_type_ccl;
2929       coding->common_flags
2930         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2931       {
2932         Lisp_Object val = XVECTOR (coding_spec)->contents[4];
2933         if (CONSP  (val)
2934             && VECTORP (XCONS (val)->car)
2935             && VECTORP (XCONS (val)->cdr))
2936           {
2937             setup_ccl_program (&(coding->spec.ccl.decoder), XCONS (val)->car);
2938             setup_ccl_program (&(coding->spec.ccl.encoder), XCONS (val)->cdr);
2939           }
2940         else
2941           goto label_invalid_coding_system;
2942       }
2943       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
2944       break;
2945
2946     case 5:
2947       coding->type = coding_type_raw_text;
2948       break;
2949
2950     default:
2951       goto label_invalid_coding_system;
2952     }
2953   return 0;
2954
2955  label_invalid_coding_system:
2956   coding->type = coding_type_no_conversion;
2957   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
2958   coding->common_flags = 0;
2959   coding->eol_type = CODING_EOL_LF;
2960   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
2961   return -1;
2962 }
2963
2964 /* Emacs has a mechanism to automatically detect a coding system if it
2965    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
2966    it's impossible to distinguish some coding systems accurately
2967    because they use the same range of codes.  So, at first, coding
2968    systems are categorized into 7, those are:
2969
2970    o coding-category-emacs-mule
2971
2972         The category for a coding system which has the same code range
2973         as Emacs' internal format.  Assigned the coding-system (Lisp
2974         symbol) `emacs-mule' by default.
2975
2976    o coding-category-sjis
2977
2978         The category for a coding system which has the same code range
2979         as SJIS.  Assigned the coding-system (Lisp
2980         symbol) `japanese-shift-jis' by default.
2981
2982    o coding-category-iso-7
2983
2984         The category for a coding system which has the same code range
2985         as ISO2022 of 7-bit environment.  This doesn't use any locking
2986         shift and single shift functions.  This can encode/decode all
2987         charsets.  Assigned the coding-system (Lisp symbol)
2988         `iso-2022-7bit' by default.
2989
2990    o coding-category-iso-7-tight
2991
2992         Same as coding-category-iso-7 except that this can
2993         encode/decode only the specified charsets.
2994
2995    o coding-category-iso-8-1
2996
2997         The category for a coding system which has the same code range
2998         as ISO2022 of 8-bit environment and graphic plane 1 used only
2999         for DIMENSION1 charset.  This doesn't use any locking shift
3000         and single shift functions.  Assigned the coding-system (Lisp
3001         symbol) `iso-latin-1' by default.
3002
3003    o coding-category-iso-8-2
3004
3005         The category for a coding system which has the same code range
3006         as ISO2022 of 8-bit environment and graphic plane 1 used only
3007         for DIMENSION2 charset.  This doesn't use any locking shift
3008         and single shift functions.  Assigned the coding-system (Lisp
3009         symbol) `japanese-iso-8bit' by default.
3010
3011    o coding-category-iso-7-else
3012
3013         The category for a coding system which has the same code range
3014         as ISO2022 of 7-bit environemnt but uses locking shift or
3015         single shift functions.  Assigned the coding-system (Lisp
3016         symbol) `iso-2022-7bit-lock' by default.
3017
3018    o coding-category-iso-8-else
3019
3020         The category for a coding system which has the same code range
3021         as ISO2022 of 8-bit environemnt but uses locking shift or
3022         single shift functions.  Assigned the coding-system (Lisp
3023         symbol) `iso-2022-8bit-ss2' by default.
3024
3025    o coding-category-big5
3026
3027         The category for a coding system which has the same code range
3028         as BIG5.  Assigned the coding-system (Lisp symbol)
3029         `cn-big5' by default.
3030
3031    o coding-category-binary
3032
3033         The category for a coding system not categorized in any of the
3034         above.  Assigned the coding-system (Lisp symbol)
3035         `no-conversion' by default.
3036
3037    Each of them is a Lisp symbol and the value is an actual
3038    `coding-system's (this is also a Lisp symbol) assigned by a user.
3039    What Emacs does actually is to detect a category of coding system.
3040    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3041    decide only one possible category, it selects a category of the
3042    highest priority.  Priorities of categories are also specified by a
3043    user in a Lisp variable `coding-category-list'.
3044
3045 */
3046
3047 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3048    If it detects possible coding systems, return an integer in which
3049    appropriate flag bits are set.  Flag bits are defined by macros
3050    CODING_CATEGORY_MASK_XXX in `coding.h'.
3051
3052    How many ASCII characters are at the head is returned as *SKIP.  */
3053
3054 static int
3055 detect_coding_mask (source, src_bytes, priorities, skip)
3056      unsigned char *source;
3057      int src_bytes, *priorities, *skip;
3058 {
3059   register unsigned char c;
3060   unsigned char *src = source, *src_end = source + src_bytes;
3061   unsigned int mask = (CODING_CATEGORY_MASK_ISO_7BIT
3062                        | CODING_CATEGORY_MASK_ISO_SHIFT);
3063   int i;
3064
3065   /* At first, skip all ASCII characters and control characters except
3066      for three ISO2022 specific control characters.  */
3067  label_loop_detect_coding:
3068   while (src < src_end)
3069     {
3070       c = *src;
3071       if (c >= 0x80
3072           || ((mask & CODING_CATEGORY_MASK_ISO_7BIT)
3073               && c == ISO_CODE_ESC)
3074           || ((mask & CODING_CATEGORY_MASK_ISO_SHIFT)
3075               && (c == ISO_CODE_SI || c == ISO_CODE_SO)))
3076         break;
3077       src++;
3078     }
3079   *skip = src - source;
3080
3081   if (src >= src_end)
3082     /* We found nothing other than ASCII.  There's nothing to do.  */
3083     return 0;
3084
3085   /* The text seems to be encoded in some multilingual coding system.
3086      Now, try to find in which coding system the text is encoded.  */
3087   if (c < 0x80)
3088     {
3089       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3090       /* C is an ISO2022 specific control code of C0.  */
3091       mask = detect_coding_iso2022 (src, src_end);
3092       if (mask == 0)
3093         {
3094           /* No valid ISO2022 code follows C.  Try again.  */
3095           src++;
3096           mask = (c != ISO_CODE_ESC
3097                   ? CODING_CATEGORY_MASK_ISO_7BIT
3098                   : CODING_CATEGORY_MASK_ISO_SHIFT);
3099           goto label_loop_detect_coding;
3100         }
3101       if (priorities)
3102         goto label_return_highest_only;
3103     }
3104   else
3105     {
3106       int try;
3107
3108       if (c < 0xA0)
3109         {
3110           /* C is the first byte of SJIS character code,
3111              or a leading-code of Emacs' internal format (emacs-mule).  */
3112           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3113
3114           /* Or, if C is a special latin extra code,
3115              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3116              or is an ISO2022 control-sequence-introducer (CSI),
3117              we should also consider the possibility of ISO2022 codings.  */
3118           if ((VECTORP (Vlatin_extra_code_table)
3119                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3120               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3121               || (c == ISO_CODE_CSI
3122                   && (src < src_end
3123                       && (*src == ']'
3124                           || ((*src == '0' || *src == '1' || *src == '2')
3125                               && src + 1 < src_end
3126                               && src[1] == ']')))))
3127             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3128                      | CODING_CATEGORY_MASK_ISO_8BIT);
3129         }
3130       else
3131         /* C is a character of ISO2022 in graphic plane right,
3132            or a SJIS's 1-byte character code (i.e. JISX0201),
3133            or the first byte of BIG5's 2-byte code.  */
3134         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3135                 | CODING_CATEGORY_MASK_ISO_8BIT
3136                 | CODING_CATEGORY_MASK_SJIS
3137                 | CODING_CATEGORY_MASK_BIG5);
3138
3139       mask = 0;
3140       if (priorities)
3141         {
3142           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3143             {
3144               priorities[i] &= try;
3145               if (priorities[i] & CODING_CATEGORY_MASK_ISO)
3146                 mask = detect_coding_iso2022 (src, src_end);
3147               else if (priorities[i] & CODING_CATEGORY_MASK_SJIS)
3148                 mask = detect_coding_sjis (src, src_end);
3149               else if (priorities[i] & CODING_CATEGORY_MASK_BIG5)
3150                 mask = detect_coding_big5 (src, src_end);
3151               else if (priorities[i] & CODING_CATEGORY_MASK_EMACS_MULE)
3152                 mask = detect_coding_emacs_mule (src, src_end);
3153               if (mask)
3154                 goto label_return_highest_only;
3155             }
3156           return CODING_CATEGORY_MASK_RAW_TEXT;
3157         }
3158       if (try & CODING_CATEGORY_MASK_ISO)
3159         mask |= detect_coding_iso2022 (src, src_end);
3160       if (try & CODING_CATEGORY_MASK_SJIS)
3161         mask |= detect_coding_sjis (src, src_end);
3162       if (try & CODING_CATEGORY_MASK_BIG5)
3163         mask |= detect_coding_big5 (src, src_end);
3164       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3165         mask |= detect_coding_emacs_mule (src, src_end);
3166     }
3167   return (mask | CODING_CATEGORY_MASK_RAW_TEXT);
3168
3169  label_return_highest_only:
3170   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3171     {
3172       if (mask & priorities[i])
3173         return priorities[i];
3174     }
3175   return CODING_CATEGORY_MASK_RAW_TEXT;
3176 }
3177
3178 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3179    The information of the detected coding system is set in CODING.  */
3180
3181 void
3182 detect_coding (coding, src, src_bytes)
3183      struct coding_system *coding;
3184      unsigned char *src;
3185      int src_bytes;
3186 {
3187   unsigned int idx;
3188   int skip, mask, i;
3189   int priorities[CODING_CATEGORY_IDX_MAX];
3190   Lisp_Object val = Vcoding_category_list;
3191
3192   i = 0;
3193   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
3194     {
3195       if (! SYMBOLP (XCONS (val)->car))
3196         break;
3197       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
3198       if (idx >= CODING_CATEGORY_IDX_MAX)
3199         break;
3200       priorities[i++] = (1 << idx);
3201       val = XCONS (val)->cdr;
3202     }
3203   /* If coding-category-list is valid and contains all coding
3204      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
3205      the following code saves Emacs from craching.  */
3206   while (i < CODING_CATEGORY_IDX_MAX)
3207     priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
3208
3209   mask = detect_coding_mask (src, src_bytes, priorities, &skip);
3210   coding->heading_ascii = skip;
3211
3212   if (!mask) return;
3213
3214   /* We found a single coding system of the highest priority in MASK.  */
3215   idx = 0;
3216   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3217   if (! mask)
3218     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3219
3220   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3221
3222   if (coding->eol_type != CODING_EOL_UNDECIDED)
3223     {
3224       Lisp_Object tmp = Fget (val, Qeol_type);
3225
3226       if (VECTORP (tmp))
3227         val = XVECTOR (tmp)->contents[coding->eol_type];
3228     }
3229   setup_coding_system (val, coding);
3230   /* Set this again because setup_coding_system reset this member.  */
3231   coding->heading_ascii = skip;
3232 }
3233
3234 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3235    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3236    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3237
3238    How many non-eol characters are at the head is returned as *SKIP.  */
3239
3240 #define MAX_EOL_CHECK_COUNT 3
3241
3242 static int
3243 detect_eol_type (source, src_bytes, skip)
3244      unsigned char *source;
3245      int src_bytes, *skip;
3246 {
3247   unsigned char *src = source, *src_end = src + src_bytes;
3248   unsigned char c;
3249   int total = 0;                /* How many end-of-lines are found so far.  */
3250   int eol_type = CODING_EOL_UNDECIDED;
3251   int this_eol_type;
3252
3253   *skip = 0;
3254
3255   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3256     {
3257       c = *src++;
3258       if (c == '\n' || c == '\r')
3259         {
3260           if (*skip == 0)
3261             *skip = src - 1 - source;
3262           total++;
3263           if (c == '\n')
3264             this_eol_type = CODING_EOL_LF;
3265           else if (src >= src_end || *src != '\n')
3266             this_eol_type = CODING_EOL_CR;
3267           else
3268             this_eol_type = CODING_EOL_CRLF, src++;
3269
3270           if (eol_type == CODING_EOL_UNDECIDED)
3271             /* This is the first end-of-line.  */
3272             eol_type = this_eol_type;
3273           else if (eol_type != this_eol_type)
3274             {
3275               /* The found type is different from what found before.  */
3276               eol_type = CODING_EOL_INCONSISTENT;
3277               break;
3278             }
3279         }
3280     }
3281
3282   if (*skip == 0)
3283     *skip = src_end - source;
3284   return eol_type;
3285 }
3286
3287 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3288    is encoded.  If it detects an appropriate format of end-of-line, it
3289    sets the information in *CODING.  */
3290
3291 void
3292 detect_eol (coding, src, src_bytes)
3293      struct coding_system *coding;
3294      unsigned char *src;
3295      int src_bytes;
3296 {
3297   Lisp_Object val;
3298   int skip;
3299   int eol_type = detect_eol_type (src, src_bytes, &skip);
3300
3301   if (coding->heading_ascii > skip)
3302     coding->heading_ascii = skip;
3303   else
3304     skip = coding->heading_ascii;
3305
3306   if (eol_type == CODING_EOL_UNDECIDED)
3307     return;
3308   if (eol_type == CODING_EOL_INCONSISTENT)
3309     {
3310 #if 0
3311       /* This code is suppressed until we find a better way to
3312          distinguish raw text file and binary file.  */
3313
3314       /* If we have already detected that the coding is raw-text, the
3315          coding should actually be no-conversion.  */
3316       if (coding->type == coding_type_raw_text)
3317         {
3318           setup_coding_system (Qno_conversion, coding);
3319           return;
3320         }
3321       /* Else, let's decode only text code anyway.  */
3322 #endif /* 0 */
3323       eol_type = CODING_EOL_LF;
3324     }
3325
3326   val = Fget (coding->symbol, Qeol_type);
3327   if (VECTORP (val) && XVECTOR (val)->size == 3)
3328     {
3329       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3330       coding->heading_ascii = skip;
3331     }
3332 }
3333
3334 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3335
3336 #define DECODING_BUFFER_MAG(coding)                                          \
3337   (coding->type == coding_type_iso2022                                       \
3338    ? 3                                                                       \
3339    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3340       ? 2                                                                    \
3341       : (coding->type == coding_type_raw_text                                \
3342          ? 1                                                                 \
3343          : (coding->type == coding_type_ccl                                  \
3344             ? coding->spec.ccl.decoder.buf_magnification                     \
3345             : 2))))
3346
3347 /* Return maximum size (bytes) of a buffer enough for decoding
3348    SRC_BYTES of text encoded in CODING.  */
3349
3350 int
3351 decoding_buffer_size (coding, src_bytes)
3352      struct coding_system *coding;
3353      int src_bytes;
3354 {
3355   return (src_bytes * DECODING_BUFFER_MAG (coding)
3356           + CONVERSION_BUFFER_EXTRA_ROOM);
3357 }
3358
3359 /* Return maximum size (bytes) of a buffer enough for encoding
3360    SRC_BYTES of text to CODING.  */
3361
3362 int
3363 encoding_buffer_size (coding, src_bytes)
3364      struct coding_system *coding;
3365      int src_bytes;
3366 {
3367   int magnification;
3368
3369   if (coding->type == coding_type_ccl)
3370     magnification = coding->spec.ccl.encoder.buf_magnification;
3371   else
3372     magnification = 3;
3373
3374   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3375 }
3376
3377 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3378 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3379 #endif
3380
3381 char *conversion_buffer;
3382 int conversion_buffer_size;
3383
3384 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3385    or decoding.  Sufficient memory is allocated automatically.  If we
3386    run out of memory, return NULL.  */
3387
3388 char *
3389 get_conversion_buffer (size)
3390      int size;
3391 {
3392   if (size > conversion_buffer_size)
3393     {
3394       char *buf;
3395       int real_size = conversion_buffer_size * 2;
3396
3397       while (real_size < size) real_size *= 2;
3398       buf = (char *) xmalloc (real_size);
3399       xfree (conversion_buffer);
3400       conversion_buffer = buf;
3401       conversion_buffer_size = real_size;
3402     }
3403   return conversion_buffer;
3404 }
3405
3406 int
3407 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3408      struct coding_system *coding;
3409      unsigned char *source, *destination;
3410      int src_bytes, dst_bytes, encodep;
3411 {
3412   struct ccl_program *ccl
3413     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3414   int result;
3415
3416   coding->produced = ccl_driver (ccl, source, destination,
3417                                  src_bytes, dst_bytes, &(coding->consumed));
3418   if (encodep)
3419     {
3420       coding->produced_char = coding->produced;
3421       coding->consumed_char
3422         = multibyte_chars_in_text (source, coding->consumed);
3423     }
3424   else
3425     {
3426       coding->produced_char
3427         = multibyte_chars_in_text (destination, coding->produced);
3428       coding->consumed_char = coding->consumed;
3429     }
3430   switch (ccl->status)
3431     {
3432     case CCL_STAT_SUSPEND_BY_SRC:
3433       result = CODING_FINISH_INSUFFICIENT_SRC;
3434       break;
3435     case CCL_STAT_SUSPEND_BY_DST:
3436       result = CODING_FINISH_INSUFFICIENT_DST;
3437       break;
3438     default:
3439       result = CODING_FINISH_NORMAL;
3440       break;
3441     }
3442   return result;
3443 }
3444
3445 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3446    decoding, it may detect coding system and format of end-of-line if
3447    those are not yet decided.  */
3448
3449 int
3450 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3451      struct coding_system *coding;
3452      unsigned char *source, *destination;
3453      int src_bytes, dst_bytes;
3454 {
3455   int result;
3456
3457   if (src_bytes <= 0)
3458     {
3459       coding->produced = coding->produced_char = 0;
3460       coding->consumed = coding->consumed_char = 0;
3461       return CODING_FINISH_NORMAL;
3462     }
3463
3464   if (coding->type == coding_type_undecided)
3465     detect_coding (coding, source, src_bytes);
3466
3467   if (coding->eol_type == CODING_EOL_UNDECIDED)
3468     detect_eol (coding, source, src_bytes);
3469
3470   switch (coding->type)
3471     {
3472     case coding_type_emacs_mule:
3473     case coding_type_undecided:
3474     case coding_type_raw_text:
3475       if (coding->eol_type == CODING_EOL_LF
3476           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3477         goto label_no_conversion;
3478       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3479       break;
3480
3481     case coding_type_sjis:
3482       result = decode_coding_sjis_big5 (coding, source, destination,
3483                                         src_bytes, dst_bytes, 1);
3484       break;
3485
3486     case coding_type_iso2022:
3487       result = decode_coding_iso2022 (coding, source, destination,
3488                                       src_bytes, dst_bytes);
3489       break;
3490
3491     case coding_type_big5:
3492       result = decode_coding_sjis_big5 (coding, source, destination,
3493                                         src_bytes, dst_bytes, 0);
3494       break;
3495
3496     case coding_type_ccl:
3497       result = ccl_coding_driver (coding, source, destination,
3498                                   src_bytes, dst_bytes, 0);
3499       break;
3500
3501     default:                    /* i.e. case coding_type_no_conversion: */
3502     label_no_conversion:
3503       if (dst_bytes && src_bytes > dst_bytes)
3504         {
3505           coding->produced = dst_bytes;
3506           result = CODING_FINISH_INSUFFICIENT_DST;
3507         }
3508       else
3509         {
3510           coding->produced = src_bytes;
3511           result = CODING_FINISH_NORMAL;
3512         }
3513       if (dst_bytes)
3514         bcopy (source, destination, coding->produced);
3515       else
3516         safe_bcopy (source, destination, coding->produced);
3517       coding->consumed
3518         = coding->consumed_char = coding->produced_char = coding->produced;
3519       break;
3520     }
3521
3522   return result;
3523 }
3524
3525 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
3526
3527 int
3528 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3529      struct coding_system *coding;
3530      unsigned char *source, *destination;
3531      int src_bytes, dst_bytes;
3532 {
3533   int result;
3534
3535   if (src_bytes <= 0)
3536     {
3537       coding->produced = coding->produced_char = 0;
3538       coding->consumed = coding->consumed_char = 0;
3539       return CODING_FINISH_NORMAL;
3540     }
3541
3542   switch (coding->type)
3543     {
3544     case coding_type_emacs_mule:
3545     case coding_type_undecided:
3546     case coding_type_raw_text:
3547       if (coding->eol_type == CODING_EOL_LF
3548           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3549         goto label_no_conversion;
3550       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3551       break;
3552
3553     case coding_type_sjis:
3554       result = encode_coding_sjis_big5 (coding, source, destination,
3555                                         src_bytes, dst_bytes, 1);
3556       break;
3557
3558     case coding_type_iso2022:
3559       result = encode_coding_iso2022 (coding, source, destination,
3560                                       src_bytes, dst_bytes);
3561       break;
3562
3563     case coding_type_big5:
3564       result = encode_coding_sjis_big5 (coding, source, destination,
3565                                         src_bytes, dst_bytes, 0);
3566       break;
3567
3568     case coding_type_ccl:
3569       result = ccl_coding_driver (coding, source, destination,
3570                                   src_bytes, dst_bytes, 1);
3571       break;
3572
3573     default:                    /* i.e. case coding_type_no_conversion: */
3574     label_no_conversion:
3575       if (dst_bytes && src_bytes > dst_bytes)
3576         {
3577           coding->produced = dst_bytes;
3578           result = CODING_FINISH_INSUFFICIENT_DST;
3579         }
3580       else
3581         {
3582           coding->produced = src_bytes;
3583           result = CODING_FINISH_NORMAL;
3584         }
3585       if (dst_bytes)
3586         bcopy (source, destination, coding->produced);
3587       else
3588         safe_bcopy (source, destination, coding->produced);
3589       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3590         {
3591           unsigned char *p = destination, *pend = p + coding->produced;
3592           while (p < pend)
3593             if (*p++ == '\015') p[-1] = '\n';
3594         }
3595       coding->consumed
3596         = coding->consumed_char = coding->produced_char = coding->produced;
3597       break;
3598     }
3599
3600   return result;
3601 }
3602
3603 /* Scan text in the region between *BEG and *END, skip characters
3604    which we don't have to decode by coding system CODING at the head
3605    and tail, then set *BEG and *END to the region of the text we
3606    actually have to convert.
3607
3608    If STR is not NULL, *BEG and *END are indices into STR.  */
3609
3610 static void
3611 shrink_decoding_region (beg, end, coding, str)
3612      int *beg, *end;
3613      struct coding_system *coding;
3614      unsigned char *str;
3615 {
3616   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3617   int eol_conversion;
3618
3619   if (coding->type == coding_type_ccl
3620       || coding->type == coding_type_undecided
3621       || !NILP (coding->post_read_conversion))
3622     {
3623       /* We can't skip any data.  */
3624       return;
3625     }
3626   else if (coding->type == coding_type_no_conversion)
3627     {
3628       /* We need no conversion.  */
3629       *beg = *end;
3630       return;
3631     }
3632
3633   if (coding->heading_ascii >= 0)
3634     /* Detection routine has already found how much we can skip at the
3635        head.  */
3636     *beg += coding->heading_ascii;
3637
3638   if (str)
3639     {
3640       begp_orig = begp = str + *beg;
3641       endp_orig = endp = str + *end;
3642     }
3643   else
3644     {
3645       move_gap (*beg);
3646       begp_orig = begp = GAP_END_ADDR;
3647       endp_orig = endp = begp + *end - *beg;
3648     }
3649
3650   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3651
3652   switch (coding->type)
3653     {
3654     case coding_type_emacs_mule:
3655     case coding_type_raw_text:
3656       if (eol_conversion)
3657         {
3658           if (coding->heading_ascii < 0)
3659             while (begp < endp && *begp != '\r') begp++;
3660           while (begp < endp && *(endp - 1) != '\r') endp--;
3661         }
3662       else
3663         begp = endp;
3664       break;
3665
3666     case coding_type_sjis:
3667     case coding_type_big5:
3668       /* We can skip all ASCII characters at the head.  */
3669       if (coding->heading_ascii < 0)
3670         {
3671           if (eol_conversion)
3672             while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
3673           else
3674             while (begp < endp && *begp < 0x80) begp++;
3675         }
3676       /* We can skip all ASCII characters at the tail except for the
3677          second byte of SJIS or BIG5 code.  */
3678       if (eol_conversion)
3679         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
3680       else
3681         while (begp < endp && endp[-1] < 0x80) endp--;
3682       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
3683         endp++;
3684       break;
3685
3686     default:            /* i.e. case coding_type_iso2022: */
3687       if (coding->heading_ascii < 0)
3688         {
3689           unsigned char c;
3690
3691           /* We can skip all ASCII characters at the head except for a
3692              few control codes.  */
3693           while (begp < endp && (c = *begp) < 0x80
3694                  && c != ISO_CODE_CR && c != ISO_CODE_SO
3695                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
3696                  && (!eol_conversion || c != ISO_CODE_LF))
3697             begp++;
3698         }
3699       switch (coding->category_idx)
3700         {
3701         case CODING_CATEGORY_IDX_ISO_8_1:
3702         case CODING_CATEGORY_IDX_ISO_8_2:
3703           /* We can skip all ASCII characters at the tail.  */
3704           if (eol_conversion)
3705             while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
3706           else
3707             while (begp < endp && endp[-1] < 0x80) endp--;
3708           break;
3709
3710         case CODING_CATEGORY_IDX_ISO_7:
3711         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3712           /* We can skip all charactes at the tail except for ESC and
3713              the following 2-byte at the tail.  */
3714           if (eol_conversion)
3715             while (begp < endp && endp[-1] != ISO_CODE_ESC && endp[-1] != '\n')
3716               endp--;
3717           else
3718             while (begp < endp && endp[-1] != ISO_CODE_ESC)
3719               endp--;
3720           if (begp < endp && endp[-1] == ISO_CODE_ESC)
3721             {
3722               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3723                 /* This is an ASCII designation sequence.  We can
3724                     surely skip the tail.  */
3725                 endp += 2;
3726               else
3727                 /* Hmmm, we can't skip the tail.  */
3728                 endp = endp_orig;
3729             }
3730         }
3731     }
3732   *beg += begp - begp_orig;
3733   *end += endp - endp_orig;
3734   return;
3735 }
3736
3737 /* Like shrink_decoding_region but for encoding.  */
3738
3739 static void
3740 shrink_encoding_region (beg, end, coding, str)
3741      int *beg, *end;
3742      struct coding_system *coding;
3743      unsigned char *str;
3744 {
3745   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3746   int eol_conversion;
3747
3748   if (coding->type == coding_type_ccl)
3749     /* We can't skip any data.  */
3750     return;
3751   else if (coding->type == coding_type_no_conversion)
3752     {
3753       /* We need no conversion.  */
3754       *beg = *end;
3755       return;
3756     }
3757
3758   if (str)
3759     {
3760       begp_orig = begp = str + *beg;
3761       endp_orig = endp = str + *end;
3762     }
3763   else
3764     {
3765       move_gap (*beg);
3766       begp_orig = begp = GAP_END_ADDR;
3767       endp_orig = endp = begp + *end - *beg;
3768     }
3769
3770   eol_conversion = (coding->eol_type == CODING_EOL_CR
3771                     || coding->eol_type == CODING_EOL_CRLF);
3772
3773   /* Here, we don't have to check coding->pre_write_conversion because
3774      the caller is expected to have handled it already.  */
3775   switch (coding->type)
3776     {
3777     case coding_type_undecided:
3778     case coding_type_emacs_mule:
3779     case coding_type_raw_text:
3780       if (eol_conversion)
3781         {
3782           while (begp < endp && *begp != '\n') begp++;
3783           while (begp < endp && endp[-1] != '\n') endp--;
3784         }
3785       else
3786         begp = endp;
3787       break;
3788
3789     case coding_type_iso2022:
3790       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3791         {
3792           unsigned char *bol = begp;
3793           while (begp < endp && *begp < 0x80)
3794             {
3795               begp++;
3796               if (begp[-1] == '\n')
3797                 bol = begp;
3798             }
3799           begp = bol;
3800           goto label_skip_tail;
3801         }
3802       /* fall down ... */
3803
3804     default:
3805       /* We can skip all ASCII characters at the head and tail.  */
3806       if (eol_conversion)
3807         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
3808       else
3809         while (begp < endp && *begp < 0x80) begp++;
3810     label_skip_tail:
3811       if (eol_conversion)
3812         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
3813       else
3814         while (begp < endp && *(endp - 1) < 0x80) endp--;
3815       break;
3816     }
3817
3818   *beg += begp - begp_orig;
3819   *end += endp - endp_orig;
3820   return;
3821 }
3822
3823 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
3824    text from FROM to TO by coding system CODING, and return number of
3825    characters in the resulting text.
3826
3827    If ADJUST is nonzero, we do various things as if the original text
3828    is deleted and a new text is inserted.  See the comments in
3829    replace_range (insdel.c) to know what we are doing.
3830
3831    ADJUST nonzero also means that post-read-conversion or
3832    pre-write-conversion functions (if any) should be processed.  */
3833
3834 int
3835 code_convert_region (from, to, coding, encodep, adjust)
3836      int from, to, encodep, adjust;
3837      struct coding_system *coding;
3838 {
3839   int len = to - from, require, inserted, inserted_byte;
3840   int from_byte, to_byte, len_byte;
3841   int from_byte_orig, to_byte_orig;
3842   Lisp_Object saved_coding_symbol = Qnil;
3843
3844   if (adjust)
3845     {
3846       prepare_to_modify_buffer (from, to, &from);
3847       to = from + len;
3848     }
3849   from_byte = CHAR_TO_BYTE (from); to_byte = CHAR_TO_BYTE (to);
3850   len_byte = to_byte - from_byte;
3851
3852   if (! encodep && CODING_REQUIRE_DETECTION (coding))
3853     {
3854       /* We must detect encoding of text and eol.  Even if detection
3855          routines can't decide the encoding, we should not let them
3856          undecided because the deeper decoding routine (decode_coding)
3857          tries to detect the encodings in vain in that case.  */
3858
3859       if (from < GPT && to > GPT)
3860         move_gap_both (from, from_byte);
3861       if (coding->type == coding_type_undecided)
3862         {
3863           detect_coding (coding, BYTE_POS_ADDR (from), len);
3864           if (coding->type == coding_type_undecided)
3865             coding->type = coding_type_emacs_mule;
3866         }
3867       if (coding->eol_type == CODING_EOL_UNDECIDED)
3868         {
3869           saved_coding_symbol = coding->symbol;
3870           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
3871           if (coding->eol_type == CODING_EOL_UNDECIDED)
3872             coding->eol_type = CODING_EOL_LF;
3873           /* We had better recover the original eol format if we
3874              encounter an inconsitent eol format while decoding.  */
3875           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
3876         }
3877     }
3878
3879   if (encodep
3880       ? ! CODING_REQUIRE_ENCODING (coding)
3881       : ! CODING_REQUIRE_DECODING (coding))
3882     return len;
3883
3884   /* Now we convert the text.  */
3885
3886   /* For encoding, we must process pre-write-conversion in advance.  */
3887   if (encodep
3888       && adjust
3889       && ! NILP (coding->pre_write_conversion)
3890       && SYMBOLP (coding->pre_write_conversion)
3891       && ! NILP (Ffboundp (coding->pre_write_conversion)))
3892     {
3893       /* The function in pre-write-conversion put a new text in a new
3894          buffer.  */
3895       struct buffer *prev = current_buffer, *new;
3896
3897       call2 (coding->pre_write_conversion, from, to);
3898       if (current_buffer != prev)
3899         {
3900           len = ZV - BEGV;
3901           new = current_buffer;
3902           set_buffer_internal_1 (prev);
3903           del_range (from, to);
3904           insert_from_buffer (new, BEG, len, 0);
3905           to = from + len;
3906           to_byte = CHAR_TO_BYTE (to);
3907           len_byte = to_byte - from_byte;
3908         }
3909     }
3910
3911   /* Try to skip the heading and tailing ASCIIs.  */
3912   from_byte_orig = from_byte; to_byte_orig = to_byte;
3913   if (encodep)
3914     shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
3915   else
3916     shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
3917   if (from_byte == to_byte)
3918     return len;
3919   /* Here, the excluded region by shrinking contains only ASCIIs.  */
3920   from += (from_byte - from_byte_orig);
3921   to += (to_byte - to_byte_orig);
3922   len = to - from;
3923   len_byte = to_byte - from_byte;
3924
3925   /* For converion, we must put the gap before the text to be decoded
3926      in addition to make the gap larger for efficient decoding.  The
3927      required gap size starts from 2000 which is the magic number used
3928      in make_gap.  But, after one batch of conversion, it will be
3929      incremented if we find that it is not enough .  */
3930   require = 2000;
3931
3932   if (GAP_SIZE  < require)
3933     make_gap (require - GAP_SIZE);
3934   move_gap_both (from, from_byte);
3935
3936   if (adjust)
3937     adjust_before_replace (from, from_byte, to, to_byte);
3938
3939   if (GPT - BEG < beg_unchanged)
3940     beg_unchanged = GPT - BEG;
3941   if (Z - GPT < end_unchanged)
3942     end_unchanged = Z - GPT;
3943
3944   inserted = inserted_byte = 0;
3945   for (;;)
3946     {
3947       int result, diff_char, diff_byte;
3948
3949       /* The buffer memory is changed from:
3950          +--------+converted-text+------------+-----original-text-----+---+
3951          |<-from->|<--inserted-->|<-GAP_SIZE->|<---------len--------->|---|  */
3952
3953       if (encodep)
3954         result = encode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0);
3955       else
3956         result = decode_coding (coding, GAP_END_ADDR, GPT_ADDR, len_byte, 0);
3957       /* to:
3958          +--------+-------converted-text--------+--+---original-text--+---+
3959          |<-from->|<----(inserted+produced)---->|--|<-(len-consumed)->|---|  */
3960
3961       diff_char = coding->produced_char - coding->consumed_char;
3962       diff_byte = coding->produced - coding->consumed;
3963
3964       GAP_SIZE -= diff_byte;
3965       ZV += diff_char; ZV_BYTE += diff_byte;
3966       Z += diff_char; Z_BYTE += diff_byte;
3967       GPT += coding->produced_char; GPT_BYTE += coding->produced;
3968
3969       inserted += coding->produced_char;
3970       inserted_byte += coding->produced;
3971       len -= coding->consumed_char;
3972       len_byte -= coding->consumed;
3973
3974       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
3975         {
3976           unsigned char *p = GPT_ADDR - inserted_byte, *pend = GPT_ADDR;
3977
3978           /* Encode LFs back to the original eol format (CR or CRLF).  */
3979           if (coding->eol_type == CODING_EOL_CR)
3980             {
3981               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
3982             }
3983           else
3984             {
3985               unsigned char *p2 = p;
3986               int count = 0;
3987
3988               while (p2 < pend) if (*p2++ == '\n') count++;
3989               if (GAP_SIZE < count)
3990                 make_gap (count - GAP_SIZE);
3991               p2 = GPT_ADDR + count;
3992               while (p < pend)
3993                 {
3994                   *--p2 = *--pend;
3995                   if (*pend == '\n') *--p2 = '\r';
3996                 }
3997               GPT += count; GAP_SIZE -= count; ZV += count; Z += count;
3998               ZV_BYTE += count; Z_BYTE += count;
3999               coding->produced += count;
4000               coding->produced_char += count;
4001               inserted += count;
4002               inserted_byte += count;
4003             }
4004
4005           /* Suppress eol-format conversion in the further conversion.  */
4006           coding->eol_type = CODING_EOL_LF;
4007
4008           /* Restore the original symbol.  */
4009           coding->symbol = saved_coding_symbol;
4010         }
4011       if (len_byte <= 0)
4012         break;
4013       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4014         {
4015           /* The source text ends in invalid codes.  Let's just
4016              make them valid buffer contents, and finish conversion.  */
4017           inserted += len;
4018           inserted_byte += len_byte;
4019           break;
4020         }
4021       if (inserted == coding->produced_char)
4022         /* We have just done the first batch of conversion.  Let's
4023            reconsider the required gap size now.
4024
4025            We have converted CONSUMED bytes into PRODUCED bytes.  To
4026            convert the remaining LEN bytes, we may need REQUIRE bytes
4027            of gap, where:
4028                REQUIRE + LEN = (LEN * PRODUCED / CONSUMED)
4029                REQUIRE = LEN * (PRODUCED - CONSUMED) / CONSUMED
4030                        = LEN * DIFF / CONSUMED
4031            Here, we are sure that DIFF is positive.  */
4032         require = len_byte * diff_byte / coding->consumed;
4033       if (GAP_SIZE  < require)
4034         make_gap (require - GAP_SIZE);
4035     }
4036   if (GAP_SIZE > 0) *GPT_ADDR = 0; /* Put an anchor.  */
4037
4038   if (adjust)
4039     {
4040       adjust_after_replace (from, from_byte, to, to_byte,
4041                             inserted, inserted_byte);
4042
4043       if (! encodep && ! NILP (coding->post_read_conversion))
4044         {
4045           Lisp_Object val;
4046           int orig_inserted = inserted, pos = PT;
4047
4048           temp_set_point_both (current_buffer, from, from_byte);
4049           val = call1 (coding->post_read_conversion, make_number (inserted));
4050           if (! NILP (val))
4051             {
4052               CHECK_NUMBER (val, 0);
4053               inserted = XFASTINT (val);
4054             }
4055           if (pos >= from + orig_inserted)
4056             temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4057         }
4058     }
4059
4060   return ((from_byte - from_byte_orig) + inserted + (to_byte_orig - to_byte));
4061 }
4062
4063 Lisp_Object
4064 code_convert_string (str, coding, encodep, nocopy)
4065      Lisp_Object str;
4066      struct coding_system *coding;
4067      int encodep, nocopy;
4068 {
4069   int len;
4070   char *buf;
4071   int from = 0, to = XSTRING (str)->size, to_byte = XSTRING (str)->size_byte;
4072   struct gcpro gcpro1;
4073   Lisp_Object saved_coding_symbol = Qnil;
4074   int result;
4075
4076   if (encodep && !NILP (coding->pre_write_conversion)
4077       || !encodep && !NILP (coding->post_read_conversion))
4078     {
4079       /* Since we have to call Lisp functions which assume target text
4080          is in a buffer, after setting a temporary buffer, call
4081          code_convert_region.  */
4082       int count = specpdl_ptr - specpdl;
4083       struct buffer *prev = current_buffer;
4084
4085       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4086       temp_output_buffer_setup (" *code-converting-work*");
4087       set_buffer_internal (XBUFFER (Vstandard_output));
4088       if (encodep)
4089         insert_from_string (str, 0, 0, to, to_byte, 0);
4090       else
4091         {
4092           /* We must insert the contents of STR as is without
4093              unibyte<->multibyte conversion.  */
4094           current_buffer->enable_multibyte_characters = Qnil;
4095           insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4096           current_buffer->enable_multibyte_characters = Qt;
4097         }
4098       code_convert_region (BEGV, ZV, coding, encodep, 1);
4099       if (encodep)
4100         /* We must return the buffer contents as unibyte string.  */
4101         current_buffer->enable_multibyte_characters = Qnil;
4102       str = make_buffer_string (BEGV, ZV, 0);
4103       set_buffer_internal (prev);
4104       return unbind_to (count, str);
4105     }
4106
4107   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4108     {
4109       /* See the comments in code_convert_region.  */
4110       if (coding->type == coding_type_undecided)
4111         {
4112           detect_coding (coding, XSTRING (str)->data, to_byte);
4113           if (coding->type == coding_type_undecided)
4114             coding->type = coding_type_emacs_mule;
4115         }
4116       if (coding->eol_type == CODING_EOL_UNDECIDED)
4117         {
4118           saved_coding_symbol = coding->symbol;
4119           detect_eol (coding, XSTRING (str)->data, to_byte);
4120           if (coding->eol_type == CODING_EOL_UNDECIDED)
4121             coding->eol_type = CODING_EOL_LF;
4122           /* We had better recover the original eol format if we
4123              encounter an inconsitent eol format while decoding.  */
4124           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4125         }
4126     }
4127
4128   if (encodep
4129       ? ! CODING_REQUIRE_ENCODING (coding)
4130       : ! CODING_REQUIRE_DECODING (coding))
4131     from = to_byte;
4132   else
4133     {
4134       /* Try to skip the heading and tailing ASCIIs.  */
4135       if (encodep)
4136         shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4137       else
4138         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4139     }
4140   if (from == to_byte)
4141     return (nocopy ? str : Fcopy_sequence (str));
4142
4143   if (encodep)
4144     len = encoding_buffer_size (coding, to_byte - from);
4145   else
4146     len = decoding_buffer_size (coding, to_byte - from);
4147   len += from + XSTRING (str)->size_byte - to_byte;
4148   GCPRO1 (str);
4149   buf = get_conversion_buffer (len);
4150   UNGCPRO;
4151
4152   if (from > 0)
4153     bcopy (XSTRING (str)->data, buf, from);
4154   result = (encodep
4155             ? encode_coding (coding, XSTRING (str)->data + from,
4156                              buf + from, to_byte - from, len)
4157             : decode_coding (coding, XSTRING (str)->data + from,
4158                              buf + from, to - from, len));
4159   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4160     {
4161       /* We simple try to decode the whole string again but without
4162          eol-conversion this time.  */
4163       coding->eol_type = CODING_EOL_LF;
4164       coding->symbol = saved_coding_symbol;
4165       return code_convert_string (str, coding, encodep, nocopy);
4166     }
4167
4168   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4169          XSTRING (str)->size_byte - to_byte);
4170
4171   len = from + XSTRING (str)->size_byte - to_byte;
4172   if (encodep)
4173     str = make_unibyte_string (buf, len + coding->produced);
4174   else
4175     str = make_multibyte_string (buf, len + coding->produced_char,
4176                                  len + coding->produced);
4177   return str;
4178 }
4179
4180 \f
4181 #ifdef emacs
4182 /*** 7. Emacs Lisp library functions ***/
4183
4184 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4185   "Return t if OBJECT is nil or a coding-system.\n\
4186 See the documentation of `make-coding-system' for information\n\
4187 about coding-system objects.")
4188   (obj)
4189      Lisp_Object obj;
4190 {
4191   if (NILP (obj))
4192     return Qt;
4193   if (!SYMBOLP (obj))
4194     return Qnil;
4195   /* Get coding-spec vector for OBJ.  */
4196   obj = Fget (obj, Qcoding_system);
4197   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4198           ? Qt : Qnil);
4199 }
4200
4201 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4202        Sread_non_nil_coding_system, 1, 1, 0,
4203   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4204   (prompt)
4205      Lisp_Object prompt;
4206 {
4207   Lisp_Object val;
4208   do
4209     {
4210       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4211                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4212     }
4213   while (XSTRING (val)->size == 0);
4214   return (Fintern (val, Qnil));
4215 }
4216
4217 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4218   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4219 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4220   (prompt, default_coding_system)
4221      Lisp_Object prompt, default_coding_system;
4222 {
4223   Lisp_Object val;
4224   if (SYMBOLP (default_coding_system))
4225     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4226   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4227                           Qt, Qnil, Qcoding_system_history,
4228                           default_coding_system, Qnil);
4229   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4230 }
4231
4232 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4233        1, 1, 0,
4234   "Check validity of CODING-SYSTEM.\n\
4235 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4236 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4237 The value of property should be a vector of length 5.")
4238   (coding_system)
4239      Lisp_Object coding_system;
4240 {
4241   CHECK_SYMBOL (coding_system, 0);
4242   if (!NILP (Fcoding_system_p (coding_system)))
4243     return coding_system;
4244   while (1)
4245     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4246 }
4247 \f
4248 Lisp_Object
4249 detect_coding_system (src, src_bytes, highest)
4250      unsigned char *src;
4251      int src_bytes, highest;
4252 {
4253   int coding_mask, eol_type;
4254   Lisp_Object val, tmp;
4255   int dummy;
4256
4257   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4258   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4259   if (eol_type == CODING_EOL_INCONSISTENT)
4260     eol_type == CODING_EOL_UNDECIDED;
4261
4262   if (!coding_mask)
4263     {
4264       val = Qundecided;
4265       if (eol_type != CODING_EOL_UNDECIDED)
4266         {
4267           Lisp_Object val2;
4268           val2 = Fget (Qundecided, Qeol_type);
4269           if (VECTORP (val2))
4270             val = XVECTOR (val2)->contents[eol_type];
4271         }
4272       return val;
4273     }
4274
4275   /* At first, gather possible coding systems in VAL.  */
4276   val = Qnil;
4277   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4278     {
4279       int idx
4280         = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
4281       if (coding_mask & (1 << idx))
4282         {
4283           val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
4284           if (highest)
4285             break;
4286         }
4287     }
4288   if (!highest)
4289     val = Fnreverse (val);
4290
4291   /* Then, substitute the elements by subsidiary coding systems.  */
4292   for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4293     {
4294       if (eol_type != CODING_EOL_UNDECIDED)
4295         {
4296           Lisp_Object eol;
4297           eol = Fget (XCONS (tmp)->car, Qeol_type);
4298           if (VECTORP (eol))
4299             XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
4300         }
4301     }
4302   return (highest ? XCONS (val)->car : val);
4303 }
4304
4305 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4306        2, 3, 0,
4307   "Detect coding system of the text in the region between START and END.\n\
4308 Return a list of possible coding systems ordered by priority.\n\
4309 \n\
4310 If only ASCII characters are found, it returns `undecided'\n\
4311 or its subsidiary coding system according to a detected end-of-line format.\n\
4312 \n\
4313 If optional argument HIGHEST is non-nil, return the coding system of\n\
4314 highest priority.")
4315   (start, end, highest)
4316      Lisp_Object start, end, highest;
4317 {
4318   int from, to;
4319   int from_byte, to_byte;
4320
4321   CHECK_NUMBER_COERCE_MARKER (start, 0);
4322   CHECK_NUMBER_COERCE_MARKER (end, 1);
4323
4324   validate_region (&start, &end);
4325   from = XINT (start), to = XINT (end);
4326   from_byte = CHAR_TO_BYTE (from);
4327   to_byte = CHAR_TO_BYTE (to);
4328
4329   if (from < GPT && to >= GPT)
4330     move_gap_both (to, to_byte);
4331
4332   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4333                                to_byte - from_byte,
4334                                !NILP (highest));
4335 }
4336
4337 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4338        1, 2, 0,
4339   "Detect coding system of the text in STRING.\n\
4340 Return a list of possible coding systems ordered by priority.\n\
4341 \n\
4342 If only ASCII characters are found, it returns `undecided'\n\
4343 or its subsidiary coding system according to a detected end-of-line format.\n\
4344 \n\
4345 If optional argument HIGHEST is non-nil, return the coding system of\n\
4346 highest priority.")
4347   (string, highest)
4348      Lisp_Object string, highest;
4349 {
4350   CHECK_STRING (string, 0);
4351
4352   return detect_coding_system (XSTRING (string)->data,
4353                                XSTRING (string)->size_byte,
4354                                !NILP (highest));
4355 }
4356
4357 Lisp_Object
4358 code_convert_region1 (start, end, coding_system, encodep)
4359      Lisp_Object start, end, coding_system;
4360      int encodep;
4361 {
4362   struct coding_system coding;
4363   int from, to, len;
4364
4365   CHECK_NUMBER_COERCE_MARKER (start, 0);
4366   CHECK_NUMBER_COERCE_MARKER (end, 1);
4367   CHECK_SYMBOL (coding_system, 2);
4368
4369   validate_region (&start, &end);
4370   from = XFASTINT (start);
4371   to = XFASTINT (end);
4372
4373   if (NILP (coding_system))
4374     return make_number (to - from);
4375
4376   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4377     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4378
4379   coding.mode |= CODING_MODE_LAST_BLOCK;
4380   len = code_convert_region (from, to, &coding, encodep, 1);
4381   return make_number (len);
4382 }
4383
4384 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4385        3, 3, "r\nzCoding system: ",
4386   "Decode the current region by specified coding system.\n\
4387 When called from a program, takes three arguments:\n\
4388 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4389 Return length of decoded text.")
4390   (start, end, coding_system)
4391      Lisp_Object start, end, coding_system;
4392 {
4393   return code_convert_region1 (start, end, coding_system, 0);
4394 }
4395
4396 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
4397        3, 3, "r\nzCoding system: ",
4398   "Encode the current region by specified coding system.\n\
4399 When called from a program, takes three arguments:\n\
4400 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4401 Return length of encoded text.")
4402   (start, end, coding_system)
4403      Lisp_Object start, end, coding_system;
4404 {
4405   return code_convert_region1 (start, end, coding_system, 1);
4406 }
4407
4408 Lisp_Object
4409 code_convert_string1 (string, coding_system, nocopy, encodep)
4410      Lisp_Object string, coding_system, nocopy;
4411      int encodep;
4412 {
4413   struct coding_system coding;
4414
4415   CHECK_STRING (string, 0);
4416   CHECK_SYMBOL (coding_system, 1);
4417
4418   if (NILP (coding_system))
4419     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4420
4421   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4422     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4423
4424   coding.mode |= CODING_MODE_LAST_BLOCK;
4425   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4426 }
4427
4428 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
4429        2, 3, 0,
4430   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
4431 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4432 if the decoding operation is trivial.")
4433   (string, coding_system, nocopy)
4434      Lisp_Object string, coding_system, nocopy;
4435 {
4436   return code_convert_string1(string, coding_system, nocopy, 0);
4437 }
4438
4439 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
4440        2, 3, 0,
4441   "Encode STRING to CODING-SYSTEM, and return the result.\n\
4442 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4443 if the encoding operation is trivial.")
4444   (string, coding_system, nocopy)
4445      Lisp_Object string, coding_system, nocopy;
4446 {
4447   return code_convert_string1(string, coding_system, nocopy, 1);
4448 }
4449
4450 \f
4451 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
4452   "Decode a JISX0208 character of shift-jis encoding.\n\
4453 CODE is the character code in SJIS.\n\
4454 Return the corresponding character.")
4455   (code)
4456      Lisp_Object code;
4457 {
4458   unsigned char c1, c2, s1, s2;
4459   Lisp_Object val;
4460
4461   CHECK_NUMBER (code, 0);
4462   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
4463   DECODE_SJIS (s1, s2, c1, c2);
4464   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
4465   return val;
4466 }
4467
4468 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
4469   "Encode a JISX0208 character CHAR to SJIS coding system.\n\
4470 Return the corresponding character code in SJIS.")
4471   (ch)
4472      Lisp_Object ch;
4473 {
4474   int charset, c1, c2, s1, s2;
4475   Lisp_Object val;
4476
4477   CHECK_NUMBER (ch, 0);
4478   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4479   if (charset == charset_jisx0208)
4480     {
4481       ENCODE_SJIS (c1, c2, s1, s2);
4482       XSETFASTINT (val, (s1 << 8) | s2);
4483     }
4484   else
4485     XSETFASTINT (val, 0);
4486   return val;
4487 }
4488
4489 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
4490   "Decode a Big5 character CODE of BIG5 coding system.\n\
4491 CODE is the character code in BIG5.\n\
4492 Return the corresponding character.")
4493   (code)
4494      Lisp_Object code;
4495 {
4496   int charset;
4497   unsigned char b1, b2, c1, c2;
4498   Lisp_Object val;
4499
4500   CHECK_NUMBER (code, 0);
4501   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
4502   DECODE_BIG5 (b1, b2, charset, c1, c2);
4503   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
4504   return val;
4505 }
4506
4507 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
4508   "Encode the Big5 character CHAR to BIG5 coding system.\n\
4509 Return the corresponding character code in Big5.")
4510   (ch)
4511      Lisp_Object ch;
4512 {
4513   int charset, c1, c2, b1, b2;
4514   Lisp_Object val;
4515
4516   CHECK_NUMBER (ch, 0);
4517   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4518   if (charset == charset_big5_1 || charset == charset_big5_2)
4519     {
4520       ENCODE_BIG5 (charset, c1, c2, b1, b2);
4521       XSETFASTINT (val, (b1 << 8) | b2);
4522     }
4523   else
4524     XSETFASTINT (val, 0);
4525   return val;
4526 }
4527 \f
4528 DEFUN ("set-terminal-coding-system-internal",
4529        Fset_terminal_coding_system_internal,
4530        Sset_terminal_coding_system_internal, 1, 1, 0, "")
4531   (coding_system)
4532      Lisp_Object coding_system;
4533 {
4534   CHECK_SYMBOL (coding_system, 0);
4535   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
4536   /* We had better not send unsafe characters to terminal.  */
4537   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
4538
4539   return Qnil;
4540 }
4541
4542 DEFUN ("set-safe-terminal-coding-system-internal",
4543        Fset_safe_terminal_coding_system_internal,
4544        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
4545   (coding_system)
4546      Lisp_Object coding_system;
4547 {
4548   CHECK_SYMBOL (coding_system, 0);
4549   setup_coding_system (Fcheck_coding_system (coding_system),
4550                        &safe_terminal_coding);
4551   return Qnil;
4552 }
4553
4554 DEFUN ("terminal-coding-system",
4555        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
4556   "Return coding system specified for terminal output.")
4557   ()
4558 {
4559   return terminal_coding.symbol;
4560 }
4561
4562 DEFUN ("set-keyboard-coding-system-internal",
4563        Fset_keyboard_coding_system_internal,
4564        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4565   (coding_system)
4566      Lisp_Object coding_system;
4567 {
4568   CHECK_SYMBOL (coding_system, 0);
4569   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
4570   return Qnil;
4571 }
4572
4573 DEFUN ("keyboard-coding-system",
4574        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
4575   "Return coding system specified for decoding keyboard input.")
4576   ()
4577 {
4578   return keyboard_coding.symbol;
4579 }
4580
4581 \f
4582 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
4583        Sfind_operation_coding_system,  1, MANY, 0,
4584   "Choose a coding system for an operation based on the target name.\n\
4585 The value names a pair of coding systems: (DECODING-SYSTEM ENCODING-SYSTEM).\n\
4586 DECODING-SYSTEM is the coding system to use for decoding\n\
4587 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
4588 for encoding (in case OPERATION does encoding).\n\
4589 \n\
4590 The first argument OPERATION specifies an I/O primitive:\n\
4591   For file I/O, `insert-file-contents' or `write-region'.\n\
4592   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
4593   For network I/O, `open-network-stream'.\n\
4594 \n\
4595 The remaining arguments should be the same arguments that were passed\n\
4596 to the primitive.  Depending on which primitive, one of those arguments\n\
4597 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
4598 whichever argument specifies the file name is TARGET.\n\
4599 \n\
4600 TARGET has a meaning which depends on OPERATION:\n\
4601   For file I/O, TARGET is a file name.\n\
4602   For process I/O, TARGET is a process name.\n\
4603   For network I/O, TARGET is a service name or a port number\n\
4604 \n\
4605 This function looks up what specified for TARGET in,\n\
4606 `file-coding-system-alist', `process-coding-system-alist',\n\
4607 or `network-coding-system-alist' depending on OPERATION.\n\
4608 They may specify a coding system, a cons of coding systems,\n\
4609 or a function symbol to call.\n\
4610 In the last case, we call the function with one argument,\n\
4611 which is a list of all the arguments given to this function.")
4612   (nargs, args)
4613      int nargs;
4614      Lisp_Object *args;
4615 {
4616   Lisp_Object operation, target_idx, target, val;
4617   register Lisp_Object chain;
4618
4619   if (nargs < 2)
4620     error ("Too few arguments");
4621   operation = args[0];
4622   if (!SYMBOLP (operation)
4623       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
4624     error ("Invalid first arguement");
4625   if (nargs < 1 + XINT (target_idx))
4626     error ("Too few arguments for operation: %s",
4627            XSYMBOL (operation)->name->data);
4628   target = args[XINT (target_idx) + 1];
4629   if (!(STRINGP (target)
4630         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
4631     error ("Invalid %dth argument", XINT (target_idx) + 1);
4632
4633   chain = ((EQ (operation, Qinsert_file_contents)
4634             || EQ (operation, Qwrite_region))
4635            ? Vfile_coding_system_alist
4636            : (EQ (operation, Qopen_network_stream)
4637               ? Vnetwork_coding_system_alist
4638               : Vprocess_coding_system_alist));
4639   if (NILP (chain))
4640     return Qnil;
4641
4642   for (; CONSP (chain); chain = XCONS (chain)->cdr)
4643     {
4644       Lisp_Object elt;
4645       elt = XCONS (chain)->car;
4646
4647       if (CONSP (elt)
4648           && ((STRINGP (target)
4649                && STRINGP (XCONS (elt)->car)
4650                && fast_string_match (XCONS (elt)->car, target) >= 0)
4651               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
4652         {
4653           val = XCONS (elt)->cdr;
4654           /* Here, if VAL is both a valid coding system and a valid
4655              function symbol, we return VAL as a coding system.  */
4656           if (CONSP (val))
4657             return val;
4658           if (! SYMBOLP (val))
4659             return Qnil;
4660           if (! NILP (Fcoding_system_p (val)))
4661             return Fcons (val, val);
4662           if (! NILP (Ffboundp (val)))
4663             {
4664               val = call1 (val, Flist (nargs, args));
4665               if (CONSP (val))
4666                 return val;
4667               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
4668                 return Fcons (val, val);
4669             }
4670           return Qnil;
4671         }
4672     }
4673   return Qnil;
4674 }
4675
4676 DEFUN ("update-iso-coding-systems", Fupdate_iso_coding_systems,
4677        Supdate_iso_coding_systems, 0, 0, 0,
4678   "Update internal database for ISO2022 based coding systems.\n\
4679 When values of the following coding categories are changed, you must\n\
4680 call this function:\n\
4681   coding-category-iso-7, coding-category-iso-7-tight,\n\
4682   coding-category-iso-8-1, coding-category-iso-8-2,\n\
4683   coding-category-iso-7-else, coding-category-iso-8-else")
4684   ()
4685 {
4686   int i;
4687
4688   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_ISO_8_ELSE;
4689        i++)
4690     {
4691       if (! coding_system_table[i])
4692         coding_system_table[i]
4693           = (struct coding_system *) xmalloc (sizeof (struct coding_system));
4694       setup_coding_system
4695         (XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value,
4696          coding_system_table[i]);
4697     }
4698   return Qnil;
4699 }
4700
4701 #endif /* emacs */
4702
4703 \f
4704 /*** 8. Post-amble ***/
4705
4706 init_coding_once ()
4707 {
4708   int i;
4709
4710   /* Emacs' internal format specific initialize routine.  */
4711   for (i = 0; i <= 0x20; i++)
4712     emacs_code_class[i] = EMACS_control_code;
4713   emacs_code_class[0x0A] = EMACS_linefeed_code;
4714   emacs_code_class[0x0D] = EMACS_carriage_return_code;
4715   for (i = 0x21 ; i < 0x7F; i++)
4716     emacs_code_class[i] = EMACS_ascii_code;
4717   emacs_code_class[0x7F] = EMACS_control_code;
4718   emacs_code_class[0x80] = EMACS_leading_code_composition;
4719   for (i = 0x81; i < 0xFF; i++)
4720     emacs_code_class[i] = EMACS_invalid_code;
4721   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
4722   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
4723   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
4724   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
4725
4726   /* ISO2022 specific initialize routine.  */
4727   for (i = 0; i < 0x20; i++)
4728     iso_code_class[i] = ISO_control_code;
4729   for (i = 0x21; i < 0x7F; i++)
4730     iso_code_class[i] = ISO_graphic_plane_0;
4731   for (i = 0x80; i < 0xA0; i++)
4732     iso_code_class[i] = ISO_control_code;
4733   for (i = 0xA1; i < 0xFF; i++)
4734     iso_code_class[i] = ISO_graphic_plane_1;
4735   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
4736   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
4737   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
4738   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
4739   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
4740   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
4741   iso_code_class[ISO_CODE_ESC] = ISO_escape;
4742   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
4743   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
4744   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
4745
4746   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
4747   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
4748
4749   setup_coding_system (Qnil, &keyboard_coding);
4750   setup_coding_system (Qnil, &terminal_coding);
4751   setup_coding_system (Qnil, &safe_terminal_coding);
4752
4753   bzero (coding_system_table, sizeof coding_system_table);
4754
4755 #if defined (MSDOS) || defined (WINDOWSNT)
4756   system_eol_type = CODING_EOL_CRLF;
4757 #else
4758   system_eol_type = CODING_EOL_LF;
4759 #endif
4760 }
4761
4762 #ifdef emacs
4763
4764 syms_of_coding ()
4765 {
4766   Qtarget_idx = intern ("target-idx");
4767   staticpro (&Qtarget_idx);
4768
4769   Qcoding_system_history = intern ("coding-system-history");
4770   staticpro (&Qcoding_system_history);
4771   Fset (Qcoding_system_history, Qnil);
4772
4773   /* Target FILENAME is the first argument.  */
4774   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
4775   /* Target FILENAME is the third argument.  */
4776   Fput (Qwrite_region, Qtarget_idx, make_number (2));
4777
4778   Qcall_process = intern ("call-process");
4779   staticpro (&Qcall_process);
4780   /* Target PROGRAM is the first argument.  */
4781   Fput (Qcall_process, Qtarget_idx, make_number (0));
4782
4783   Qcall_process_region = intern ("call-process-region");
4784   staticpro (&Qcall_process_region);
4785   /* Target PROGRAM is the third argument.  */
4786   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
4787
4788   Qstart_process = intern ("start-process");
4789   staticpro (&Qstart_process);
4790   /* Target PROGRAM is the third argument.  */
4791   Fput (Qstart_process, Qtarget_idx, make_number (2));
4792
4793   Qopen_network_stream = intern ("open-network-stream");
4794   staticpro (&Qopen_network_stream);
4795   /* Target SERVICE is the fourth argument.  */
4796   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
4797
4798   Qcoding_system = intern ("coding-system");
4799   staticpro (&Qcoding_system);
4800
4801   Qeol_type = intern ("eol-type");
4802   staticpro (&Qeol_type);
4803
4804   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
4805   staticpro (&Qbuffer_file_coding_system);
4806
4807   Qpost_read_conversion = intern ("post-read-conversion");
4808   staticpro (&Qpost_read_conversion);
4809
4810   Qpre_write_conversion = intern ("pre-write-conversion");
4811   staticpro (&Qpre_write_conversion);
4812
4813   Qno_conversion = intern ("no-conversion");
4814   staticpro (&Qno_conversion);
4815
4816   Qundecided = intern ("undecided");
4817   staticpro (&Qundecided);
4818
4819   Qcoding_system_p = intern ("coding-system-p");
4820   staticpro (&Qcoding_system_p);
4821
4822   Qcoding_system_error = intern ("coding-system-error");
4823   staticpro (&Qcoding_system_error);
4824
4825   Fput (Qcoding_system_error, Qerror_conditions,
4826         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
4827   Fput (Qcoding_system_error, Qerror_message,
4828         build_string ("Invalid coding system"));
4829
4830   Qcoding_category = intern ("coding-category");
4831   staticpro (&Qcoding_category);
4832   Qcoding_category_index = intern ("coding-category-index");
4833   staticpro (&Qcoding_category_index);
4834
4835   Vcoding_category_table
4836     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
4837   staticpro (&Vcoding_category_table);
4838   {
4839     int i;
4840     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
4841       {
4842         XVECTOR (Vcoding_category_table)->contents[i]
4843           = intern (coding_category_name[i]);
4844         Fput (XVECTOR (Vcoding_category_table)->contents[i],
4845               Qcoding_category_index, make_number (i));
4846       }
4847   }
4848
4849   Qcharacter_unification_table = intern ("character-unification-table");
4850   staticpro (&Qcharacter_unification_table);
4851   Fput (Qcharacter_unification_table, Qchar_table_extra_slots,
4852         make_number (0));
4853
4854   Qcharacter_unification_table_for_decode
4855     = intern ("character-unification-table-for-decode");
4856   staticpro (&Qcharacter_unification_table_for_decode);
4857
4858   Qcharacter_unification_table_for_encode
4859     = intern ("character-unification-table-for-encode");
4860   staticpro (&Qcharacter_unification_table_for_encode);
4861
4862   Qsafe_charsets = intern ("safe-charsets");
4863   staticpro (&Qsafe_charsets);
4864
4865   Qemacs_mule = intern ("emacs-mule");
4866   staticpro (&Qemacs_mule);
4867
4868   Qraw_text = intern ("raw-text");
4869   staticpro (&Qraw_text);
4870
4871   defsubr (&Scoding_system_p);
4872   defsubr (&Sread_coding_system);
4873   defsubr (&Sread_non_nil_coding_system);
4874   defsubr (&Scheck_coding_system);
4875   defsubr (&Sdetect_coding_region);
4876   defsubr (&Sdetect_coding_string);
4877   defsubr (&Sdecode_coding_region);
4878   defsubr (&Sencode_coding_region);
4879   defsubr (&Sdecode_coding_string);
4880   defsubr (&Sencode_coding_string);
4881   defsubr (&Sdecode_sjis_char);
4882   defsubr (&Sencode_sjis_char);
4883   defsubr (&Sdecode_big5_char);
4884   defsubr (&Sencode_big5_char);
4885   defsubr (&Sset_terminal_coding_system_internal);
4886   defsubr (&Sset_safe_terminal_coding_system_internal);
4887   defsubr (&Sterminal_coding_system);
4888   defsubr (&Sset_keyboard_coding_system_internal);
4889   defsubr (&Skeyboard_coding_system);
4890   defsubr (&Sfind_operation_coding_system);
4891   defsubr (&Supdate_iso_coding_systems);
4892
4893   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
4894     "List of coding systems.\n\
4895 \n\
4896 Do not alter the value of this variable manually.  This variable should be\n\
4897 updated by the functions `make-coding-system' and\n\
4898 `define-coding-system-alias'.");
4899   Vcoding_system_list = Qnil;
4900
4901   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
4902     "Alist of coding system names.\n\
4903 Each element is one element list of coding system name.\n\
4904 This variable is given to `completing-read' as TABLE argument.\n\
4905 \n\
4906 Do not alter the value of this variable manually.  This variable should be\n\
4907 updated by the functions `make-coding-system' and\n\
4908 `define-coding-system-alias'.");
4909   Vcoding_system_alist = Qnil;
4910
4911   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
4912     "List of coding-categories (symbols) ordered by priority.");
4913   {
4914     int i;
4915
4916     Vcoding_category_list = Qnil;
4917     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
4918       Vcoding_category_list
4919         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
4920                  Vcoding_category_list);
4921   }
4922
4923   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
4924     "Specify the coding system for read operations.\n\
4925 It is useful to bind this variable with `let', but do not set it globally.\n\
4926 If the value is a coding system, it is used for decoding on read operation.\n\
4927 If not, an appropriate element is used from one of the coding system alists:\n\
4928 There are three such tables, `file-coding-system-alist',\n\
4929 `process-coding-system-alist', and `network-coding-system-alist'.");
4930   Vcoding_system_for_read = Qnil;
4931
4932   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
4933     "Specify the coding system for write operations.\n\
4934 It is useful to bind this variable with `let', but do not set it globally.\n\
4935 If the value is a coding system, it is used for encoding on write operation.\n\
4936 If not, an appropriate element is used from one of the coding system alists:\n\
4937 There are three such tables, `file-coding-system-alist',\n\
4938 `process-coding-system-alist', and `network-coding-system-alist'.");
4939   Vcoding_system_for_write = Qnil;
4940
4941   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
4942     "Coding system used in the latest file or process I/O.");
4943   Vlast_coding_system_used = Qnil;
4944
4945   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
4946     "*Non-nil inhibit code conversion of end-of-line format in any cases.");
4947   inhibit_eol_conversion = 0;
4948
4949   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
4950     "Alist to decide a coding system to use for a file I/O operation.\n\
4951 The format is ((PATTERN . VAL) ...),\n\
4952 where PATTERN is a regular expression matching a file name,\n\
4953 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
4954 If VAL is a coding system, it is used for both decoding and encoding\n\
4955 the file contents.\n\
4956 If VAL is a cons of coding systems, the car part is used for decoding,\n\
4957 and the cdr part is used for encoding.\n\
4958 If VAL is a function symbol, the function must return a coding system\n\
4959 or a cons of coding systems which are used as above.\n\
4960 \n\
4961 See also the function `find-operation-coding-system'.");
4962   Vfile_coding_system_alist = Qnil;
4963
4964   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
4965     "Alist to decide a coding system to use for a process I/O operation.\n\
4966 The format is ((PATTERN . VAL) ...),\n\
4967 where PATTERN is a regular expression matching a program name,\n\
4968 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
4969 If VAL is a coding system, it is used for both decoding what received\n\
4970 from the program and encoding what sent to the program.\n\
4971 If VAL is a cons of coding systems, the car part is used for decoding,\n\
4972 and the cdr part is used for encoding.\n\
4973 If VAL is a function symbol, the function must return a coding system\n\
4974 or a cons of coding systems which are used as above.\n\
4975 \n\
4976 See also the function `find-operation-coding-system'.");
4977   Vprocess_coding_system_alist = Qnil;
4978
4979   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
4980     "Alist to decide a coding system to use for a network I/O operation.\n\
4981 The format is ((PATTERN . VAL) ...),\n\
4982 where PATTERN is a regular expression matching a network service name\n\
4983 or is a port number to connect to,\n\
4984 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
4985 If VAL is a coding system, it is used for both decoding what received\n\
4986 from the network stream and encoding what sent to the network stream.\n\
4987 If VAL is a cons of coding systems, the car part is used for decoding,\n\
4988 and the cdr part is used for encoding.\n\
4989 If VAL is a function symbol, the function must return a coding system\n\
4990 or a cons of coding systems which are used as above.\n\
4991 \n\
4992 See also the function `find-operation-coding-system'.");
4993   Vnetwork_coding_system_alist = Qnil;
4994
4995   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
4996     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
4997   eol_mnemonic_unix = ':';
4998
4999   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
5000     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
5001   eol_mnemonic_dos = '\\';
5002
5003   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
5004     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
5005   eol_mnemonic_mac = '/';
5006
5007   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5008     "Mnemonic character indicating end-of-line format is not yet decided.");
5009   eol_mnemonic_undecided = ':';
5010
5011   DEFVAR_LISP ("enable-character-unification", &Venable_character_unification,
5012     "Non-nil means ISO 2022 encoder/decoder do character unification.");
5013   Venable_character_unification = Qt;
5014
5015   DEFVAR_LISP ("standard-character-unification-table-for-decode",
5016     &Vstandard_character_unification_table_for_decode,
5017     "Table for unifying characters when reading.");
5018   Vstandard_character_unification_table_for_decode = Qnil;
5019
5020   DEFVAR_LISP ("standard-character-unification-table-for-encode",
5021     &Vstandard_character_unification_table_for_encode,
5022     "Table for unifying characters when writing.");
5023   Vstandard_character_unification_table_for_encode = Qnil;
5024
5025   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5026     "Alist of charsets vs revision numbers.\n\
5027 While encoding, if a charset (car part of an element) is found,\n\
5028 designate it with the escape sequence identifing revision (cdr part of the element).");
5029   Vcharset_revision_alist = Qnil;
5030
5031   DEFVAR_LISP ("default-process-coding-system",
5032                &Vdefault_process_coding_system,
5033     "Cons of coding systems used for process I/O by default.\n\
5034 The car part is used for decoding a process output,\n\
5035 the cdr part is used for encoding a text to be sent to a process.");
5036   Vdefault_process_coding_system = Qnil;
5037
5038   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5039     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5040 This is a vector of length 256.\n\
5041 If Nth element is non-nil, the existence of code N in a file\n\
5042 \(or output of subprocess) doesn't prevent it to be detected as\n\
5043 a coding system of ISO 2022 variant which has a flag\n\
5044 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5045 or reading output of a subprocess.\n\
5046 Only 128th through 159th elements has a meaning.");
5047   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5048
5049   DEFVAR_LISP ("select-safe-coding-system-function",
5050                &Vselect_safe_coding_system_function,
5051     "Function to call to select safe coding system for encoding a text.\n\
5052 \n\
5053 If set, this function is called to force a user to select a proper\n\
5054 coding system which can encode the text in the case that a default\n\
5055 coding system used in each operation can't encode the text.\n\
5056 \n\
5057 The default value is `select-safe-codign-system' (which see).");
5058   Vselect_safe_coding_system_function = Qnil;
5059
5060 }
5061
5062 #endif /* emacs */