code.delx.au - gnu-emacs/blob - src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. End-of-line handlers
  29   6. C library functions
  30   7. Emacs Lisp library functions
  31   8. Post-amble
  32
  33 */
  34
  35 /*** GENERAL NOTE on CODING SYSTEM ***
  36
  37   Coding system is an encoding mechanism of one or more character
  38   sets.  Here's a list of coding systems which Emacs can handle.  When
  39   we say "decode", it means converting some other coding system to
  40   Emacs' internal format (emacs-internal), and when we say "encode",
  41   it means converting the coding system emacs-mule to some other
  42   coding system.
  43
  44   0. Emacs' internal format (emacs-mule)
  45
  46   Emacs itself holds a multi-lingual character in a buffer and a string
  47   in a special format.  Details are described in section 2.
  48
  49   1. ISO2022
  50
  51   The most famous coding system for multiple character sets.  X's
  52   Compound Text, various EUCs (Extended Unix Code), and coding
  53   systems used in Internet communication such as ISO-2022-JP are
  54   all variants of ISO2022.  Details are described in section 3.
  55
  56   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  57
  58   A coding system to encode character sets: ASCII, JISX0201, and
  59   JISX0208.  Widely used for PC's in Japan.  Details are described in
  60   section 4.
  61
  62   3. BIG5
  63
  64   A coding system to encode character sets: ASCII and Big5.  Widely
  65   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  66   described in section 4.  In this file, when we write "BIG5"
  67   (all uppercase), we mean the coding system, and when we write
  68   "Big5" (capitalized), we mean the character set.
  69
  70   4. Raw text
  71
  72   A coding system for a text containing random 8-bit code.  Emacs does
  73   no code conversion on such a text except for end-of-line format.
  74
  75   5. Other
  76
  77   If a user wants to read/write a text encoded in a coding system not
  78   listed above, he can supply a decoder and an encoder for it in CCL
  79   (Code Conversion Language) programs.  Emacs executes the CCL program
  80   while reading/writing.
  81
  82   Emacs represents a coding system by a Lisp symbol that has a property
  83   `coding-system'.  But, before actually using the coding system, the
  84   information about it is set in a structure of type `struct
  85   coding_system' for rapid processing.  See section 6 for more details.
  86
  87 */
  88
  89 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  90
  91   How end-of-line of a text is encoded depends on a system.  For
  92   instance, Unix's format is just one byte of `line-feed' code,
  93   whereas DOS's format is two-byte sequence of `carriage-return' and
  94   `line-feed' codes.  MacOS's format is usually one byte of
  95   `carriage-return'.
  96
  97   Since text characters encoding and end-of-line encoding are
  98   independent, any coding system described above can take
  99   any format of end-of-line.  So, Emacs has information of format of
 100   end-of-line in each coding-system.  See section 6 for more details.
 101
 102 */
 103
 104 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 105
 106   These functions check if a text between SRC and SRC_END is encoded
 107   in the coding system category XXX.  Each returns an integer value in
 108   which appropriate flag bits for the category XXX is set.  The flag
 109   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 110   template of these functions.  */
 111 #if 0
 112 int
 113 detect_coding_emacs_mule (src, src_end)
 114      unsigned char *src, *src_end;
 115 {
 116   ...
 117 }
 118 #endif
 119
 120 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 121
 122   These functions decode SRC_BYTES length text at SOURCE encoded in
 123   CODING to Emacs' internal format (emacs-mule).  The resulting text
 124   goes to a place pointed to by DESTINATION, the length of which
 125   should not exceed DST_BYTES.  These functions set the information of
 126   original and decoded texts in the members produced, produced_char,
 127   consumed, and consumed_char of the structure *CODING.
 128
 129   The return value is an integer (CODING_FINISH_XXX) indicating how
 130   the decoding finished.
 131
 132   DST_BYTES zero means that source area and destination area are
 133   overlapped, which means that we can produce a decoded text until it
 134   reaches at the head of not-yet-decoded source text.
 135
 136   Below is a template of these functions.  */
 137 #if 0
 138 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 139      struct coding_system *coding;
 140      unsigned char *source, *destination;
 141      int src_bytes, dst_bytes;
 142 {
 143   ...
 144 }
 145 #endif
 146
 147 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 148
 149   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 150   internal format (emacs-mule) to CODING.  The resulting text goes to
 151   a place pointed to by DESTINATION, the length of which should not
 152   exceed DST_BYTES.  These functions set the information of
 153   original and encoded texts in the members produced, produced_char,
 154   consumed, and consumed_char of the structure *CODING.
 155
 156   The return value is an integer (CODING_FINISH_XXX) indicating how
 157   the encoding finished.
 158
 159   DST_BYTES zero means that source area and destination area are
 160   overlapped, which means that we can produce a decoded text until it
 161   reaches at the head of not-yet-decoded source text.
 162
 163   Below is a template of these functions.  */
 164 #if 0
 165 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 166      struct coding_system *coding;
 167      unsigned char *source, *destination;
 168      int src_bytes, dst_bytes;
 169 {
 170   ...
 171 }
 172 #endif
 173
 174 /*** COMMONLY USED MACROS ***/
 175
 176 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 177    THREE_MORE_BYTES safely get one, two, and three bytes from the
 178    source text respectively.  If there are not enough bytes in the
 179    source, they jump to `label_end_of_loop'.  The caller should set
 180    variables `src' and `src_end' to appropriate areas in advance.  */
 181
 182 #define ONE_MORE_BYTE(c1)       \
 183   do {                          \
 184     if (src < src_end)          \
 185       c1 = *src++;              \
 186     else                        \
 187       goto label_end_of_loop;   \
 188   } while (0)
 189
 190 #define TWO_MORE_BYTES(c1, c2)  \
 191   do {                          \
 192     if (src + 1 < src_end)      \
 193       c1 = *src++, c2 = *src++; \
 194     else                        \
 195       goto label_end_of_loop;   \
 196   } while (0)
 197
 198 #define THREE_MORE_BYTES(c1, c2, c3)            \
 199   do {                                          \
 200     if (src + 2 < src_end)                      \
 201       c1 = *src++, c2 = *src++, c3 = *src++;    \
 202     else                                        \
 203       goto label_end_of_loop;                   \
 204   } while (0)
 205
 206 /* The following three macros DECODE_CHARACTER_ASCII,
 207    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 208    the multi-byte form of a character of each class at the place
 209    pointed by `dst'.  The caller should set the variable `dst' to
 210    point to an appropriate area and the variable `coding' to point to
 211    the coding-system of the currently decoding text in advance.  */
 212
 213 /* Decode one ASCII character C.  */
 214
 215 #define DECODE_CHARACTER_ASCII(c)                               \
 216   do {                                                          \
 217     if (COMPOSING_P (coding->composing))                        \
 218       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 219     else                                                        \
 220       {                                                         \
 221         *dst++ = (c);                                           \
 222         coding->produced_char++;                                \
 223       }                                                         \
 224   } while (0)
 225
 226 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 227    position-code is C.  */
 228
 229 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 230   do {                                                                  \
 231     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 232     if (COMPOSING_P (coding->composing))                                \
 233       *dst++ = leading_code + 0x20;                                     \
 234     else                                                                \
 235       {                                                                 \
 236         *dst++ = leading_code;                                          \
 237         coding->produced_char++;                                        \
 238       }                                                                 \
 239     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 240       *dst++ = leading_code;                                            \
 241     *dst++ = (c) | 0x80;                                                \
 242   } while (0)
 243
 244 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 245    position-codes are C1 and C2.  */
 246
 247 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 248   do {                                                  \
 249     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 250     *dst++ = (c2) | 0x80;                               \
 251   } while (0)
 252
 253 \f
 254 /*** 1. Preamble ***/
 255
 256 #include <stdio.h>
 257
 258 #ifdef emacs
 259
 260 #include <config.h>
 261 #include "lisp.h"
 262 #include "buffer.h"
 263 #include "charset.h"
 264 #include "ccl.h"
 265 #include "coding.h"
 266 #include "window.h"
 267
 268 #else  /* not emacs */
 269
 270 #include "mulelib.h"
 271
 272 #endif /* not emacs */
 273
 274 Lisp_Object Qcoding_system, Qeol_type;
 275 Lisp_Object Qbuffer_file_coding_system;
 276 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 277 Lisp_Object Qno_conversion, Qundecided;
 278 Lisp_Object Qcoding_system_history;
 279 Lisp_Object Qsafe_charsets;
 280
 281 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 282 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 283 Lisp_Object Qstart_process, Qopen_network_stream;
 284 Lisp_Object Qtarget_idx;
 285
 286 Lisp_Object Vselect_safe_coding_system_function;
 287
 288 /* Mnemonic character of each format of end-of-line.  */
 289 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 290 /* Mnemonic character to indicate format of end-of-line is not yet
 291    decided.  */
 292 int eol_mnemonic_undecided;
 293
 294 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 295    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 296 int system_eol_type;
 297
 298 #ifdef emacs
 299
 300 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 301
 302 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 303
 304 /* Coding system emacs-mule and raw-text are for converting only
 305    end-of-line format.  */
 306 Lisp_Object Qemacs_mule, Qraw_text;
 307
 308 /* Coding-systems are handed between Emacs Lisp programs and C internal
 309    routines by the following three variables.  */
 310 /* Coding-system for reading files and receiving data from process.  */
 311 Lisp_Object Vcoding_system_for_read;
 312 /* Coding-system for writing files and sending data to process.  */
 313 Lisp_Object Vcoding_system_for_write;
 314 /* Coding-system actually used in the latest I/O.  */
 315 Lisp_Object Vlast_coding_system_used;
 316
 317 /* A vector of length 256 which contains information about special
 318    Latin codes (espepcially for dealing with Microsoft code).  */
 319 Lisp_Object Vlatin_extra_code_table;
 320
 321 /* Flag to inhibit code conversion of end-of-line format.  */
 322 int inhibit_eol_conversion;
 323
 324 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 325 int inherit_process_coding_system;
 326
 327 /* Coding system to be used to encode text for terminal display.  */
 328 struct coding_system terminal_coding;
 329
 330 /* Coding system to be used to encode text for terminal display when
 331    terminal coding system is nil.  */
 332 struct coding_system safe_terminal_coding;
 333
 334 /* Coding system of what is sent from terminal keyboard.  */
 335 struct coding_system keyboard_coding;
 336
 337 Lisp_Object Vfile_coding_system_alist;
 338 Lisp_Object Vprocess_coding_system_alist;
 339 Lisp_Object Vnetwork_coding_system_alist;
 340
 341 #endif /* emacs */
 342
 343 Lisp_Object Qcoding_category, Qcoding_category_index;
 344
 345 /* List of symbols `coding-category-xxx' ordered by priority.  */
 346 Lisp_Object Vcoding_category_list;
 347
 348 /* Table of coding categories (Lisp symbols).  */
 349 Lisp_Object Vcoding_category_table;
 350
 351 /* Table of names of symbol for each coding-category.  */
 352 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 353   "coding-category-emacs-mule",
 354   "coding-category-sjis",
 355   "coding-category-iso-7",
 356   "coding-category-iso-7-tight",
 357   "coding-category-iso-8-1",
 358   "coding-category-iso-8-2",
 359   "coding-category-iso-7-else",
 360   "coding-category-iso-8-else",
 361   "coding-category-big5",
 362   "coding-category-raw-text",
 363   "coding-category-binary"
 364 };
 365
 366 /* Table of pointers to coding systems corresponding to each coding
 367    categories.  */
 368 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 369
 370 /* Table of coding category masks.  Nth element is a mask for a coding
 371    cateogry of which priority is Nth.  */
 372 static
 373 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 374
 375 /* Flag to tell if we look up translation table on character code
 376    conversion.  */
 377 Lisp_Object Venable_character_translation;
 378 /* Standard translation table to look up on decoding (reading).  */
 379 Lisp_Object Vstandard_translation_table_for_decode;
 380 /* Standard translation table to look up on encoding (writing).  */
 381 Lisp_Object Vstandard_translation_table_for_encode;
 382
 383 Lisp_Object Qtranslation_table;
 384 Lisp_Object Qtranslation_table_id;
 385 Lisp_Object Qtranslation_table_for_decode;
 386 Lisp_Object Qtranslation_table_for_encode;
 387
 388 /* Alist of charsets vs revision number.  */
 389 Lisp_Object Vcharset_revision_alist;
 390
 391 /* Default coding systems used for process I/O.  */
 392 Lisp_Object Vdefault_process_coding_system;
 393
 394 \f
 395 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 396
 397 /* Emacs' internal format for encoding multiple character sets is a
 398    kind of multi-byte encoding, i.e. characters are encoded by
 399    variable-length sequences of one-byte codes.  ASCII characters
 400    and control characters (e.g. `tab', `newline') are represented by
 401    one-byte sequences which are their ASCII codes, in the range 0x00
 402    through 0x7F.  The other characters are represented by a sequence
 403    of `base leading-code', optional `extended leading-code', and one
 404    or two `position-code's.  The length of the sequence is determined
 405    by the base leading-code.  Leading-code takes the range 0x80
 406    through 0x9F, whereas extended leading-code and position-code take
 407    the range 0xA0 through 0xFF.  See `charset.h' for more details
 408    about leading-code and position-code.
 409
 410    There's one exception to this rule.  Special leading-code
 411    `leading-code-composition' denotes that the following several
 412    characters should be composed into one character.  Leading-codes of
 413    components (except for ASCII) are added 0x20.  An ASCII character
 414    component is represented by a 2-byte sequence of `0xA0' and
 415    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 416    details of composite character.  Hence, we can summarize the code
 417    range as follows:
 418
 419    --- CODE RANGE of Emacs' internal format ---
 420    (character set)      (range)
 421    ASCII                0x00 .. 0x7F
 422    ELSE (1st byte)      0x80 .. 0x9F
 423         (rest bytes)    0xA0 .. 0xFF
 424    ---------------------------------------------
 425
 426   */
 427
 428 enum emacs_code_class_type emacs_code_class[256];
 429
 430 /* Go to the next statement only if *SRC is accessible and the code is
 431    greater than 0xA0.  */
 432 #define CHECK_CODE_RANGE_A0_FF  \
 433   do {                          \
 434     if (src >= src_end)         \
 435       goto label_end_of_switch; \
 436     else if (*src++ < 0xA0)     \
 437       return 0;                 \
 438   } while (0)
 439
 440 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 441    Check if a text is encoded in Emacs' internal format.  If it is,
 442    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 443
 444 int
 445 detect_coding_emacs_mule (src, src_end)
 446      unsigned char *src, *src_end;
 447 {
 448   unsigned char c;
 449   int composing = 0;
 450
 451   while (src < src_end)
 452     {
 453       c = *src++;
 454
 455       if (composing)
 456         {
 457           if (c < 0xA0)
 458             composing = 0;
 459           else
 460             c -= 0x20;
 461         }
 462
 463       switch (emacs_code_class[c])
 464         {
 465         case EMACS_ascii_code:
 466         case EMACS_linefeed_code:
 467           break;
 468
 469         case EMACS_control_code:
 470           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 471             return 0;
 472           break;
 473
 474         case EMACS_invalid_code:
 475           return 0;
 476
 477         case EMACS_leading_code_composition: /* c == 0x80 */
 478           if (composing)
 479             CHECK_CODE_RANGE_A0_FF;
 480           else
 481             composing = 1;
 482           break;
 483
 484         case EMACS_leading_code_4:
 485           CHECK_CODE_RANGE_A0_FF;
 486           /* fall down to check it two more times ...  */
 487
 488         case EMACS_leading_code_3:
 489           CHECK_CODE_RANGE_A0_FF;
 490           /* fall down to check it one more time ...  */
 491
 492         case EMACS_leading_code_2:
 493           CHECK_CODE_RANGE_A0_FF;
 494           break;
 495
 496         default:
 497         label_end_of_switch:
 498           break;
 499         }
 500     }
 501   return CODING_CATEGORY_MASK_EMACS_MULE;
 502 }
 503
 504 \f
 505 /*** 3. ISO2022 handlers ***/
 506
 507 /* The following note describes the coding system ISO2022 briefly.
 508    Since the intention of this note is to help in understanding of
 509    the programs in this file, some parts are NOT ACCURATE or OVERLY
 510    SIMPLIFIED.  For the thorough understanding, please refer to the
 511    original document of ISO2022.
 512
 513    ISO2022 provides many mechanisms to encode several character sets
 514    in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
 515    all text is encoded by codes of less than 128.  This may make the
 516    encoded text a little bit longer, but the text gets more stability
 517    to pass through several gateways (some of them strip off the MSB).
 518
 519    There are two kinds of character set: control character set and
 520    graphic character set.  The former contains control characters such
 521    as `newline' and `escape' to provide control functions (control
 522    functions are provided also by escape sequences).  The latter
 523    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 524    two control character sets and many graphic character sets.
 525
 526    Graphic character sets are classified into one of the following
 527    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 528    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 529    bytes (DIMENSION) and the number of characters in one dimension
 530    (CHARS) of the set.  In addition, each character set is assigned an
 531    identification tag (called "final character" and denoted as <F>
 532    here after) which is unique in each class.  <F> of each character
 533    set is decided by ECMA(*) when it is registered in ISO.  Code range
 534    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 535
 536    Note (*): ECMA = European Computer Manufacturers Association
 537
 538    Here are examples of graphic character set [NAME(<F>)]:
 539         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 540         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 541         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 542         o DIMENSION2_CHARS96 -- none for the moment
 543
 544    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 545         C0 [0x00..0x1F] -- control character plane 0
 546         GL [0x20..0x7F] -- graphic character plane 0
 547         C1 [0x80..0x9F] -- control character plane 1
 548         GR [0xA0..0xFF] -- graphic character plane 1
 549
 550    A control character set is directly designated and invoked to C0 or
 551    C1 by an escape sequence.  The most common case is that ISO646's
 552    control character set is designated/invoked to C0 and ISO6429's
 553    control character set is designated/invoked to C1, and usually
 554    these designations/invocations are omitted in a coded text.  With
 555    7-bit environment, only C0 can be used, and a control character for
 556    C1 is encoded by an appropriate escape sequence to fit in the
 557    environment.  All control characters for C1 are defined the
 558    corresponding escape sequences.
 559
 560    A graphic character set is at first designated to one of four
 561    graphic registers (G0 through G3), then these graphic registers are
 562    invoked to GL or GR.  These designations and invocations can be
 563    done independently.  The most common case is that G0 is invoked to
 564    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 565    these invocations and designations are omitted in a coded text.
 566    With 7-bit environment, only GL can be used.
 567
 568    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 569    and 0x7F of GL area work as control characters SPACE and DEL
 570    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 571
 572    There are two ways of invocation: locking-shift and single-shift.
 573    With locking-shift, the invocation lasts until the next different
 574    invocation, whereas with single-shift, the invocation works only
 575    for the following character and doesn't affect locking-shift.
 576    Invocations are done by the following control characters or escape
 577    sequences.
 578
 579    ----------------------------------------------------------------------
 580    function             control char    escape sequence description
 581    ----------------------------------------------------------------------
 582    SI  (shift-in)               0x0F    none            invoke G0 to GL
 583    SO  (shift-out)              0x0E    none            invoke G1 to GL
 584    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 585    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 586    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 587    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 588    ----------------------------------------------------------------------
 589    The first four are for locking-shift.  Control characters for these
 590    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 591
 592    Designations are done by the following escape sequences.
 593    ----------------------------------------------------------------------
 594    escape sequence      description
 595    ----------------------------------------------------------------------
 596    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 597    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 598    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 599    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 600    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 601    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 602    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 603    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 604    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 605    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 606    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 607    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 608    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 609    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 610    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 611    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 612    ----------------------------------------------------------------------
 613
 614    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 615    of dimension 1, chars 94, and final character <F>, and etc.
 616
 617    Note (*): Although these designations are not allowed in ISO2022,
 618    Emacs accepts them on decoding, and produces them on encoding
 619    CHARS96 character set in a coding system which is characterized as
 620    7-bit environment, non-locking-shift, and non-single-shift.
 621
 622    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 623    '(' can be omitted.  We call this as "short-form" here after.
 624
 625    Now you may notice that there are a lot of ways for encoding the
 626    same multilingual text in ISO2022.  Actually, there exists many
 627    coding systems such as Compound Text (used in X's inter client
 628    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 629    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 630    localized platforms), and all of these are variants of ISO2022.
 631
 632    In addition to the above, Emacs handles two more kinds of escape
 633    sequences: ISO6429's direction specification and Emacs' private
 634    sequence for specifying character composition.
 635
 636    ISO6429's direction specification takes the following format:
 637         o CSI ']'      -- end of the current direction
 638         o CSI '0' ']'  -- end of the current direction
 639         o CSI '1' ']'  -- start of left-to-right text
 640         o CSI '2' ']'  -- start of right-to-left text
 641    The control character CSI (0x9B: control sequence introducer) is
 642    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 643
 644    Character composition specification takes the following format:
 645         o ESC '0' -- start character composition
 646         o ESC '1' -- end character composition
 647    Since these are not standard escape sequences of any ISO, the use
 648    of them for these meaning is restricted to Emacs only.  */
 649
 650 enum iso_code_class_type iso_code_class[256];
 651
 652 #define CHARSET_OK(idx, charset)                        \
 653   (coding_system_table[idx]->safe_charsets[charset]     \
 654    || (CODING_SPEC_ISO_REQUESTED_DESIGNATION            \
 655        (coding_system_table[idx], charset)              \
 656        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 657
 658 #define SHIFT_OUT_OK(idx) \
 659   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 660
 661 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 662    Check if a text is encoded in ISO2022.  If it is, returns an
 663    integer in which appropriate flag bits any of:
 664         CODING_CATEGORY_MASK_ISO_7
 665         CODING_CATEGORY_MASK_ISO_7_TIGHT
 666         CODING_CATEGORY_MASK_ISO_8_1
 667         CODING_CATEGORY_MASK_ISO_8_2
 668         CODING_CATEGORY_MASK_ISO_7_ELSE
 669         CODING_CATEGORY_MASK_ISO_8_ELSE
 670    are set.  If a code which should never appear in ISO2022 is found,
 671    returns 0.  */
 672
 673 int
 674 detect_coding_iso2022 (src, src_end)
 675      unsigned char *src, *src_end;
 676 {
 677   int mask = CODING_CATEGORY_MASK_ISO;
 678   int mask_found = 0;
 679   int reg[4], shift_out = 0;
 680   int c, c1, i, charset;
 681
 682   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 683   while (mask && src < src_end)
 684     {
 685       c = *src++;
 686       switch (c)
 687         {
 688         case ISO_CODE_ESC:
 689           if (src >= src_end)
 690             break;
 691           c = *src++;
 692           if (c >= '(' && c <= '/')
 693             {
 694               /* Designation sequence for a charset of dimension 1.  */
 695               if (src >= src_end)
 696                 break;
 697               c1 = *src++;
 698               if (c1 < ' ' || c1 >= 0x80
 699                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 700                 /* Invalid designation sequence.  Just ignore.  */
 701                 break;
 702               reg[(c - '(') % 4] = charset;
 703             }
 704           else if (c == '$')
 705             {
 706               /* Designation sequence for a charset of dimension 2.  */
 707               if (src >= src_end)
 708                 break;
 709               c = *src++;
 710               if (c >= '@' && c <= 'B')
 711                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 712                 reg[0] = charset = iso_charset_table[1][0][c];
 713               else if (c >= '(' && c <= '/')
 714                 {
 715                   if (src >= src_end)
 716                     break;
 717                   c1 = *src++;
 718                   if (c1 < ' ' || c1 >= 0x80
 719                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 720                     /* Invalid designation sequence.  Just ignore.  */
 721                     break;
 722                   reg[(c - '(') % 4] = charset;
 723                 }
 724               else
 725                 /* Invalid designation sequence.  Just ignore.  */
 726                 break;
 727             }
 728           else if (c == 'N' || c == 'n')
 729             {
 730               if (shift_out == 0
 731                   && (reg[1] >= 0
 732                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 733                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 734                 {
 735                   /* Locking shift out.  */
 736                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 737                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 738                   shift_out = 1;
 739                 }
 740               break;
 741             }
 742           else if (c == 'O' || c == 'o')
 743             {
 744               if (shift_out == 1)
 745                 {
 746                   /* Locking shift in.  */
 747                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 748                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 749                   shift_out = 0;
 750                 }
 751               break;
 752             }
 753           else if (c == '0' || c == '1' || c == '2')
 754             /* Start/end composition.  Just ignore.  */
 755             break;
 756           else
 757             /* Invalid escape sequence.  Just ignore.  */
 758             break;
 759
 760           /* We found a valid designation sequence for CHARSET.  */
 761           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 762           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 763             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 764           else
 765             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 766           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 767             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 768           else
 769             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 770           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 771             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 772           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 773             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 774           break;
 775
 776         case ISO_CODE_SO:
 777           if (shift_out == 0
 778               && (reg[1] >= 0
 779                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 780                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 781             {
 782               /* Locking shift out.  */
 783               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 784               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 785             }
 786           break;
 787
 788         case ISO_CODE_SI:
 789           if (shift_out == 1)
 790             {
 791               /* Locking shift in.  */
 792               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 793               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 794             }
 795           break;
 796
 797         case ISO_CODE_CSI:
 798         case ISO_CODE_SS2:
 799         case ISO_CODE_SS3:
 800           {
 801             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 802
 803             if (c != ISO_CODE_CSI)
 804               {
 805                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 806                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 807                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 808                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 809                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 810                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 811               }
 812             if (VECTORP (Vlatin_extra_code_table)
 813                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 814               {
 815                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 816                     & CODING_FLAG_ISO_LATIN_EXTRA)
 817                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 818                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 819                     & CODING_FLAG_ISO_LATIN_EXTRA)
 820                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 821               }
 822             mask &= newmask;
 823             mask_found |= newmask;
 824           }
 825           break;
 826
 827         default:
 828           if (c < 0x80)
 829             break;
 830           else if (c < 0xA0)
 831             {
 832               if (VECTORP (Vlatin_extra_code_table)
 833                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 834                 {
 835                   int newmask = 0;
 836
 837                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 838                       & CODING_FLAG_ISO_LATIN_EXTRA)
 839                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 840                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 841                       & CODING_FLAG_ISO_LATIN_EXTRA)
 842                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 843                   mask &= newmask;
 844                   mask_found |= newmask;
 845                 }
 846               else
 847                 return 0;
 848             }
 849           else
 850             {
 851               unsigned char *src_begin = src;
 852
 853               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 854                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 855               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 856               while (src < src_end && *src >= 0xA0)
 857                 src++;
 858               if ((src - src_begin - 1) & 1 && src < src_end)
 859                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 860               else
 861                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 862             }
 863           break;
 864         }
 865     }
 866
 867   return (mask & mask_found);
 868 }
 869
 870 /* Decode a character of which charset is CHARSET and the 1st position
 871    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 872    fetched from SRC and set to C2.  If CHARSET is negative, it means
 873    that we are decoding ill formed text, and what we can do is just to
 874    read C1 as is.  */
 875
 876 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 877   do {                                                                  \
 878     int c_alt, charset_alt = (charset);                                 \
 879     if (COMPOSING_HEAD_P (coding->composing))                           \
 880       {                                                                 \
 881         *dst++ = LEADING_CODE_COMPOSITION;                              \
 882         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 883           /* To tell composition rules are embeded.  */                 \
 884           *dst++ = 0xFF;                                                \
 885         coding->composing += 2;                                         \
 886       }                                                                 \
 887     if ((charset) >= 0)                                                 \
 888       {                                                                 \
 889         if (CHARSET_DIMENSION (charset) == 2)                           \
 890           {                                                             \
 891             ONE_MORE_BYTE (c2);                                         \
 892             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 893                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 894               {                                                         \
 895                 src--;                                                  \
 896                 c2 = ' ';                                               \
 897               }                                                         \
 898           }                                                             \
 899         if (!NILP (translation_table)                                   \
 900             && ((c_alt = translate_char (translation_table,             \
 901                                          -1, (charset), c1, c2)) >= 0)) \
 902           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 903       }                                                                 \
 904     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 905       DECODE_CHARACTER_ASCII (c1);                                      \
 906     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 907       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 908     else                                                                \
 909       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 910     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 911       /* To tell a composition rule follows.  */                        \
 912       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 913   } while (0)
 914
 915 /* Set designation state into CODING.  */
 916 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 917   do {                                                                     \
 918     int charset = ISO_CHARSET_TABLE (make_number (dimension),              \
 919                                      make_number (chars),                  \
 920                                      make_number (final_char));            \
 921     if (charset >= 0                                                       \
 922         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 923             || coding->safe_charsets[charset]))                            \
 924       {                                                                    \
 925         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 926             && reg == 0                                                    \
 927             && charset == CHARSET_ASCII)                                   \
 928           {                                                                \
 929             /* We should insert this designation sequence as is so         \
 930                that it is surely written back to a file.  */               \
 931             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 932             goto label_invalid_code;                                       \
 933           }                                                                \
 934         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 935         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 936             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 937           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 938         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 939       }                                                                    \
 940     else                                                                   \
 941       {                                                                    \
 942         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 943         goto label_invalid_code;                                           \
 944       }                                                                    \
 945   } while (0)
 946
 947 /* Check if the current composing sequence contains only valid codes.
 948    If the composing sequence doesn't end before SRC_END, return -1.
 949    Else, if it contains only valid codes, return 0.
 950    Else return the length of the composing sequence.  */
 951
 952 int
 953 check_composing_code (coding, src, src_end)
 954      struct coding_system *coding;
 955      unsigned char *src, *src_end;
 956 {
 957   unsigned char *src_start = src;
 958   int invalid_code_found = 0;
 959   int charset, c, c1, dim;
 960
 961   while (src < src_end)
 962     {
 963       if (*src++ != ISO_CODE_ESC) continue;
 964       if (src >= src_end) break;
 965       if ((c = *src++) == '1') /* end of compsition */
 966         return (invalid_code_found ? src - src_start : 0);
 967       if (src + 2 >= src_end) break;
 968       if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
 969         invalid_code_found = 1;
 970       else
 971         {
 972           dim = 0;
 973           if (c == '$')
 974             {
 975               dim = 1;
 976               c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
 977             }
 978           if (c >= '(' && c <= '/')
 979             {
 980               c1 = *src++;
 981               if ((c1 < ' ' || c1 >= 0x80)
 982                   || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
 983                   || ! coding->safe_charsets[charset]
 984                   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
 985                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 986                 invalid_code_found = 1;
 987             }
 988           else
 989             invalid_code_found = 1;
 990         }
 991     }
 992   return (invalid_code_found
 993           ? src - src_start
 994           : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
 995 }
 996
 997 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 998
 999 int
1000 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1001      struct coding_system *coding;
1002      unsigned char *source, *destination;
1003      int src_bytes, dst_bytes;
1004 {
1005   unsigned char *src = source;
1006   unsigned char *src_end = source + src_bytes;
1007   unsigned char *dst = destination;
1008   unsigned char *dst_end = destination + dst_bytes;
1009   /* Since the maximum bytes produced by each loop is 7, we subtract 6
1010      from DST_END to assure that overflow checking is necessary only
1011      at the head of loop.  */
1012   unsigned char *adjusted_dst_end = dst_end - 6;
1013   int charset;
1014   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1015   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1016   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1017   Lisp_Object translation_table
1018     = coding->translation_table_for_decode;
1019   int result = CODING_FINISH_NORMAL;
1020
1021   if (!NILP (Venable_character_translation) && NILP (translation_table))
1022     translation_table = Vstandard_translation_table_for_decode;
1023
1024   coding->produced_char = 0;
1025   coding->fake_multibyte = 0;
1026   while (src < src_end && (dst_bytes
1027                            ? (dst < adjusted_dst_end)
1028                            : (dst < src - 6)))
1029     {
1030       /* SRC_BASE remembers the start position in source in each loop.
1031          The loop will be exited when there's not enough source text
1032          to analyze long escape sequence or 2-byte code (within macros
1033          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1034          to SRC_BASE before exiting.  */
1035       unsigned char *src_base = src;
1036       int c1 = *src++, c2;
1037
1038       switch (iso_code_class [c1])
1039         {
1040         case ISO_0x20_or_0x7F:
1041           if (!coding->composing
1042               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1043             {
1044               /* This is SPACE or DEL.  */
1045               *dst++ = c1;
1046               coding->produced_char++;
1047               break;
1048             }
1049           /* This is a graphic character, we fall down ...  */
1050
1051         case ISO_graphic_plane_0:
1052           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1053             {
1054               /* This is a composition rule.  */
1055               *dst++ = c1 | 0x80;
1056               coding->composing = COMPOSING_WITH_RULE_TAIL;
1057             }
1058           else
1059             DECODE_ISO_CHARACTER (charset0, c1);
1060           break;
1061
1062         case ISO_0xA0_or_0xFF:
1063           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1064               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1065             goto label_invalid_code;
1066           /* This is a graphic character, we fall down ... */
1067
1068         case ISO_graphic_plane_1:
1069           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1070             goto label_invalid_code;
1071           else
1072             DECODE_ISO_CHARACTER (charset1, c1);
1073           break;
1074
1075         case ISO_control_code:
1076           /* All ISO2022 control characters in this class have the
1077              same representation in Emacs internal format.  */
1078           if (c1 == '\n'
1079               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1080               && (coding->eol_type == CODING_EOL_CR
1081                   || coding->eol_type == CODING_EOL_CRLF))
1082             {
1083               result = CODING_FINISH_INCONSISTENT_EOL;
1084               goto label_end_of_loop_2;
1085             }
1086           *dst++ = c1;
1087           coding->produced_char++;
1088           break;
1089
1090         case ISO_carriage_return:
1091           if (coding->eol_type == CODING_EOL_CR)
1092             *dst++ = '\n';
1093           else if (coding->eol_type == CODING_EOL_CRLF)
1094             {
1095               ONE_MORE_BYTE (c1);
1096               if (c1 == ISO_CODE_LF)
1097                 *dst++ = '\n';
1098               else
1099                 {
1100                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1101                     {
1102                       result = CODING_FINISH_INCONSISTENT_EOL;
1103                       goto label_end_of_loop_2;
1104                     }
1105                   src--;
1106                   *dst++ = '\r';
1107                 }
1108             }
1109           else
1110             *dst++ = c1;
1111           coding->produced_char++;
1112           break;
1113
1114         case ISO_shift_out:
1115           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1116               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1117             goto label_invalid_code;
1118           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1119           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1120           break;
1121
1122         case ISO_shift_in:
1123           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1124             goto label_invalid_code;
1125           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1126           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1127           break;
1128
1129         case ISO_single_shift_2_7:
1130         case ISO_single_shift_2:
1131           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1132             goto label_invalid_code;
1133           /* SS2 is handled as an escape sequence of ESC 'N' */
1134           c1 = 'N';
1135           goto label_escape_sequence;
1136
1137         case ISO_single_shift_3:
1138           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1139             goto label_invalid_code;
1140           /* SS2 is handled as an escape sequence of ESC 'O' */
1141           c1 = 'O';
1142           goto label_escape_sequence;
1143
1144         case ISO_control_sequence_introducer:
1145           /* CSI is handled as an escape sequence of ESC '[' ...  */
1146           c1 = '[';
1147           goto label_escape_sequence;
1148
1149         case ISO_escape:
1150           ONE_MORE_BYTE (c1);
1151         label_escape_sequence:
1152           /* Escape sequences handled by Emacs are invocation,
1153              designation, direction specification, and character
1154              composition specification.  */
1155           switch (c1)
1156             {
1157             case '&':           /* revision of following character set */
1158               ONE_MORE_BYTE (c1);
1159               if (!(c1 >= '@' && c1 <= '~'))
1160                 goto label_invalid_code;
1161               ONE_MORE_BYTE (c1);
1162               if (c1 != ISO_CODE_ESC)
1163                 goto label_invalid_code;
1164               ONE_MORE_BYTE (c1);
1165               goto label_escape_sequence;
1166
1167             case '$':           /* designation of 2-byte character set */
1168               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1169                 goto label_invalid_code;
1170               ONE_MORE_BYTE (c1);
1171               if (c1 >= '@' && c1 <= 'B')
1172                 {       /* designation of JISX0208.1978, GB2312.1980,
1173                                    or JISX0208.1980 */
1174                   DECODE_DESIGNATION (0, 2, 94, c1);
1175                 }
1176               else if (c1 >= 0x28 && c1 <= 0x2B)
1177                 {       /* designation of DIMENSION2_CHARS94 character set */
1178                   ONE_MORE_BYTE (c2);
1179                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1180                 }
1181               else if (c1 >= 0x2C && c1 <= 0x2F)
1182                 {       /* designation of DIMENSION2_CHARS96 character set */
1183                   ONE_MORE_BYTE (c2);
1184                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1185                 }
1186               else
1187                 goto label_invalid_code;
1188               break;
1189
1190             case 'n':           /* invocation of locking-shift-2 */
1191               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1192                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1193                 goto label_invalid_code;
1194               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1195               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1196               break;
1197
1198             case 'o':           /* invocation of locking-shift-3 */
1199               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1200                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1201                 goto label_invalid_code;
1202               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1203               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1204               break;
1205
1206             case 'N':           /* invocation of single-shift-2 */
1207               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1208                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1209                 goto label_invalid_code;
1210               ONE_MORE_BYTE (c1);
1211               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1212               DECODE_ISO_CHARACTER (charset, c1);
1213               break;
1214
1215             case 'O':           /* invocation of single-shift-3 */
1216               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1217                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1218                 goto label_invalid_code;
1219               ONE_MORE_BYTE (c1);
1220               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1221               DECODE_ISO_CHARACTER (charset, c1);
1222               break;
1223
1224             case '0': case '2': /* start composing */
1225               /* Before processing composing, we must be sure that all
1226                  characters being composed are supported by CODING.
1227                  If not, we must give up composing and insert the
1228                  bunch of codes for composing as is without decoding.  */
1229               {
1230                 int result1;
1231
1232                 result1 = check_composing_code (coding, src, src_end);
1233                 if (result1 == 0)
1234                   {
1235                     coding->composing = (c1 == '0'
1236                                          ? COMPOSING_NO_RULE_HEAD
1237                                          : COMPOSING_WITH_RULE_HEAD);
1238                     coding->produced_char++;
1239                   }
1240                 else if (result1 > 0)
1241                   {
1242                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1243                       {
1244                         bcopy (src_base, dst, result1 + 2);
1245                         src += result1;
1246                         dst += result1 + 2;
1247                         coding->produced_char += result1 + 2;
1248                       }
1249                     else
1250                       {
1251                         result = CODING_FINISH_INSUFFICIENT_DST;
1252                         goto label_end_of_loop_2;
1253                       }
1254                   }
1255                 else
1256                   goto label_end_of_loop;
1257               }
1258               break;
1259
1260             case '1':           /* end composing */
1261               coding->composing = COMPOSING_NO;
1262               break;
1263
1264             case '[':           /* specification of direction */
1265               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1266                 goto label_invalid_code;
1267               /* For the moment, nested direction is not supported.
1268                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1269                  left-to-right, and nozero means right-to-left.  */
1270               ONE_MORE_BYTE (c1);
1271               switch (c1)
1272                 {
1273                 case ']':       /* end of the current direction */
1274                   coding->mode &= ~CODING_MODE_DIRECTION;
1275
1276                 case '0':       /* end of the current direction */
1277                 case '1':       /* start of left-to-right direction */
1278                   ONE_MORE_BYTE (c1);
1279                   if (c1 == ']')
1280                     coding->mode &= ~CODING_MODE_DIRECTION;
1281                   else
1282                     goto label_invalid_code;
1283                   break;
1284
1285                 case '2':       /* start of right-to-left direction */
1286                   ONE_MORE_BYTE (c1);
1287                   if (c1 == ']')
1288                     coding->mode |= CODING_MODE_DIRECTION;
1289                   else
1290                     goto label_invalid_code;
1291                   break;
1292
1293                 default:
1294                   goto label_invalid_code;
1295                 }
1296               break;
1297
1298             default:
1299               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1300                 goto label_invalid_code;
1301               if (c1 >= 0x28 && c1 <= 0x2B)
1302                 {       /* designation of DIMENSION1_CHARS94 character set */
1303                   ONE_MORE_BYTE (c2);
1304                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1305                 }
1306               else if (c1 >= 0x2C && c1 <= 0x2F)
1307                 {       /* designation of DIMENSION1_CHARS96 character set */
1308                   ONE_MORE_BYTE (c2);
1309                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1310                 }
1311               else
1312                 {
1313                   goto label_invalid_code;
1314                 }
1315             }
1316           /* We must update these variables now.  */
1317           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1318           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1319           break;
1320
1321         label_invalid_code:
1322           while (src_base < src)
1323             *dst++ = *src_base++;
1324           coding->fake_multibyte = 1;
1325         }
1326       continue;
1327
1328     label_end_of_loop:
1329       result = CODING_FINISH_INSUFFICIENT_SRC;
1330     label_end_of_loop_2:
1331       src = src_base;
1332       break;
1333     }
1334
1335   if (src < src_end)
1336     {
1337       if (result == CODING_FINISH_NORMAL)
1338         result = CODING_FINISH_INSUFFICIENT_DST;
1339       else if (result != CODING_FINISH_INCONSISTENT_EOL
1340                && coding->mode & CODING_MODE_LAST_BLOCK)
1341         {
1342           /* This is the last block of the text to be decoded.  We had
1343              better just flush out all remaining codes in the text
1344              although they are not valid characters.  */
1345           src_bytes = src_end - src;
1346           if (dst_bytes && (dst_end - dst < src_bytes))
1347             src_bytes = dst_end - dst;
1348           bcopy (src, dst, src_bytes);
1349           dst += src_bytes;
1350           src += src_bytes;
1351           coding->fake_multibyte = 1;
1352         }
1353     }
1354
1355   coding->consumed = coding->consumed_char = src - source;
1356   coding->produced = dst - destination;
1357   return result;
1358 }
1359
1360 /* ISO2022 encoding stuff.  */
1361
1362 /*
1363    It is not enough to say just "ISO2022" on encoding, we have to
1364    specify more details.  In Emacs, each coding system of ISO2022
1365    variant has the following specifications:
1366         1. Initial designation to G0 thru G3.
1367         2. Allows short-form designation?
1368         3. ASCII should be designated to G0 before control characters?
1369         4. ASCII should be designated to G0 at end of line?
1370         5. 7-bit environment or 8-bit environment?
1371         6. Use locking-shift?
1372         7. Use Single-shift?
1373    And the following two are only for Japanese:
1374         8. Use ASCII in place of JIS0201-1976-Roman?
1375         9. Use JISX0208-1983 in place of JISX0208-1978?
1376    These specifications are encoded in `coding->flags' as flag bits
1377    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1378    details.
1379 */
1380
1381 /* Produce codes (escape sequence) for designating CHARSET to graphic
1382    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1383    the coding system CODING allows, produce designation sequence of
1384    short-form.  */
1385
1386 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1387   do {                                                                  \
1388     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1389     char *intermediate_char_94 = "()*+";                                \
1390     char *intermediate_char_96 = ",-./";                                \
1391     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1392     if (revision < 255)                                                 \
1393       {                                                                 \
1394         *dst++ = ISO_CODE_ESC;                                          \
1395         *dst++ = '&';                                                   \
1396         *dst++ = '@' + revision;                                        \
1397       }                                                                 \
1398     *dst++ = ISO_CODE_ESC;                                              \
1399     if (CHARSET_DIMENSION (charset) == 1)                               \
1400       {                                                                 \
1401         if (CHARSET_CHARS (charset) == 94)                              \
1402           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1403         else                                                            \
1404           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1405       }                                                                 \
1406     else                                                                \
1407       {                                                                 \
1408         *dst++ = '$';                                                   \
1409         if (CHARSET_CHARS (charset) == 94)                              \
1410           {                                                             \
1411             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1412                 || reg != 0                                             \
1413                 || final_char < '@' || final_char > 'B')                \
1414               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1415           }                                                             \
1416         else                                                            \
1417           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1418       }                                                                 \
1419     *dst++ = final_char;                                                \
1420     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1421   } while (0)
1422
1423 /* The following two macros produce codes (control character or escape
1424    sequence) for ISO2022 single-shift functions (single-shift-2 and
1425    single-shift-3).  */
1426
1427 #define ENCODE_SINGLE_SHIFT_2                           \
1428   do {                                                  \
1429     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1430       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1431     else                                                \
1432       {                                                 \
1433         *dst++ = ISO_CODE_SS2;                          \
1434         coding->fake_multibyte = 1;                     \
1435       }                                                 \
1436     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1437   } while (0)
1438
1439 #define ENCODE_SINGLE_SHIFT_3                           \
1440   do {                                                  \
1441     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1442       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1443     else                                                \
1444       {                                                 \
1445         *dst++ = ISO_CODE_SS3;                          \
1446         coding->fake_multibyte = 1;                     \
1447       }                                                 \
1448     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1449   } while (0)
1450
1451 /* The following four macros produce codes (control character or
1452    escape sequence) for ISO2022 locking-shift functions (shift-in,
1453    shift-out, locking-shift-2, and locking-shift-3).  */
1454
1455 #define ENCODE_SHIFT_IN                         \
1456   do {                                          \
1457     *dst++ = ISO_CODE_SI;                       \
1458     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1459   } while (0)
1460
1461 #define ENCODE_SHIFT_OUT                        \
1462   do {                                          \
1463     *dst++ = ISO_CODE_SO;                       \
1464     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1465   } while (0)
1466
1467 #define ENCODE_LOCKING_SHIFT_2                  \
1468   do {                                          \
1469     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1470     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1471   } while (0)
1472
1473 #define ENCODE_LOCKING_SHIFT_3                  \
1474   do {                                          \
1475     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1476     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1477   } while (0)
1478
1479 /* Produce codes for a DIMENSION1 character whose character set is
1480    CHARSET and whose position-code is C1.  Designation and invocation
1481    sequences are also produced in advance if necessary.  */
1482
1483
1484 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1485   do {                                                                  \
1486     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1487       {                                                                 \
1488         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1489           *dst++ = c1 & 0x7F;                                           \
1490         else                                                            \
1491           *dst++ = c1 | 0x80;                                           \
1492         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1493         break;                                                          \
1494       }                                                                 \
1495     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1496       {                                                                 \
1497         *dst++ = c1 & 0x7F;                                             \
1498         break;                                                          \
1499       }                                                                 \
1500     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1501       {                                                                 \
1502         *dst++ = c1 | 0x80;                                             \
1503         break;                                                          \
1504       }                                                                 \
1505     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1506              && !coding->safe_charsets[charset])                        \
1507       {                                                                 \
1508         /* We should not encode this character, instead produce one or  \
1509            two `?'s.  */                                                \
1510         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1511         if (CHARSET_WIDTH (charset) == 2)                               \
1512           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1513         break;                                                          \
1514       }                                                                 \
1515     else                                                                \
1516       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1517          must invoke it, or, at first, designate it to some graphic     \
1518          register.  Then repeat the loop to actually produce the        \
1519          character.  */                                                 \
1520       dst = encode_invocation_designation (charset, coding, dst);       \
1521   } while (1)
1522
1523 /* Produce codes for a DIMENSION2 character whose character set is
1524    CHARSET and whose position-codes are C1 and C2.  Designation and
1525    invocation codes are also produced in advance if necessary.  */
1526
1527 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1528   do {                                                                  \
1529     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1530       {                                                                 \
1531         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1532           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1533         else                                                            \
1534           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1535         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1536         break;                                                          \
1537       }                                                                 \
1538     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1539       {                                                                 \
1540         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1541         break;                                                          \
1542       }                                                                 \
1543     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1544       {                                                                 \
1545         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1546         break;                                                          \
1547       }                                                                 \
1548     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1549              && !coding->safe_charsets[charset])                        \
1550       {                                                                 \
1551         /* We should not encode this character, instead produce one or  \
1552            two `?'s.  */                                                \
1553         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1554         if (CHARSET_WIDTH (charset) == 2)                               \
1555           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1556         break;                                                          \
1557       }                                                                 \
1558     else                                                                \
1559       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1560          must invoke it, or, at first, designate it to some graphic     \
1561          register.  Then repeat the loop to actually produce the        \
1562          character.  */                                                 \
1563       dst = encode_invocation_designation (charset, coding, dst);       \
1564   } while (1)
1565
1566 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                   \
1567   do {                                                          \
1568     int c_alt, charset_alt;                                     \
1569     if (!NILP (translation_table)                               \
1570         && ((c_alt = translate_char (translation_table, -1,     \
1571                                      charset, c1, c2))          \
1572             >= 0))                                              \
1573       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
1574     else                                                        \
1575       charset_alt = charset;                                    \
1576     if (CHARSET_DIMENSION (charset_alt) == 1)                   \
1577       {                                                         \
1578         if (charset == CHARSET_ASCII                            \
1579             && coding->flags & CODING_FLAG_ISO_USE_ROMAN)       \
1580           charset_alt = charset_latin_jisx0201;                 \
1581         ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);      \
1582       }                                                         \
1583     else                                                        \
1584       {                                                         \
1585         if (charset == charset_jisx0208                         \
1586             && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)      \
1587           charset_alt = charset_jisx0208_1978;                  \
1588         ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);  \
1589       }                                                         \
1590     if (! COMPOSING_P (coding->composing))                      \
1591       coding->consumed_char++;                                  \
1592   } while (0)
1593
1594 /* Produce designation and invocation codes at a place pointed by DST
1595    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1596    Return new DST.  */
1597
1598 unsigned char *
1599 encode_invocation_designation (charset, coding, dst)
1600      int charset;
1601      struct coding_system *coding;
1602      unsigned char *dst;
1603 {
1604   int reg;                      /* graphic register number */
1605
1606   /* At first, check designations.  */
1607   for (reg = 0; reg < 4; reg++)
1608     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1609       break;
1610
1611   if (reg >= 4)
1612     {
1613       /* CHARSET is not yet designated to any graphic registers.  */
1614       /* At first check the requested designation.  */
1615       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1616       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1617         /* Since CHARSET requests no special designation, designate it
1618            to graphic register 0.  */
1619         reg = 0;
1620
1621       ENCODE_DESIGNATION (charset, reg, coding);
1622     }
1623
1624   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1625       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1626     {
1627       /* Since the graphic register REG is not invoked to any graphic
1628          planes, invoke it to graphic plane 0.  */
1629       switch (reg)
1630         {
1631         case 0:                 /* graphic register 0 */
1632           ENCODE_SHIFT_IN;
1633           break;
1634
1635         case 1:                 /* graphic register 1 */
1636           ENCODE_SHIFT_OUT;
1637           break;
1638
1639         case 2:                 /* graphic register 2 */
1640           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1641             ENCODE_SINGLE_SHIFT_2;
1642           else
1643             ENCODE_LOCKING_SHIFT_2;
1644           break;
1645
1646         case 3:                 /* graphic register 3 */
1647           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1648             ENCODE_SINGLE_SHIFT_3;
1649           else
1650             ENCODE_LOCKING_SHIFT_3;
1651           break;
1652         }
1653     }
1654   return dst;
1655 }
1656
1657 /* The following two macros produce codes for indicating composition.  */
1658 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1659 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1660 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1661
1662 /* The following three macros produce codes for indicating direction
1663    of text.  */
1664 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1665   do {                                                  \
1666     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1667       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1668     else                                                \
1669       *dst++ = ISO_CODE_CSI;                            \
1670   } while (0)
1671
1672 #define ENCODE_DIRECTION_R2L    \
1673   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1674
1675 #define ENCODE_DIRECTION_L2R    \
1676   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1677
1678 /* Produce codes for designation and invocation to reset the graphic
1679    planes and registers to initial state.  */
1680 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1681   do {                                                                      \
1682     int reg;                                                                \
1683     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1684       ENCODE_SHIFT_IN;                                                      \
1685     for (reg = 0; reg < 4; reg++)                                           \
1686       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1687           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1688               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1689         ENCODE_DESIGNATION                                                  \
1690           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1691   } while (0)
1692
1693 /* Produce designation sequences of charsets in the line started from
1694    SRC to a place pointed by *DSTP, and update DSTP.
1695
1696    If the current block ends before any end-of-line, we may fail to
1697    find all the necessary designations.  */
1698
1699 void
1700 encode_designation_at_bol (coding, table, src, src_end, dstp)
1701      struct coding_system *coding;
1702      Lisp_Object table;
1703      unsigned char *src, *src_end, **dstp;
1704 {
1705   int charset, c, found = 0, reg;
1706   /* Table of charsets to be designated to each graphic register.  */
1707   int r[4];
1708   unsigned char *dst = *dstp;
1709
1710   for (reg = 0; reg < 4; reg++)
1711     r[reg] = -1;
1712
1713   while (src < src_end && *src != '\n' && found < 4)
1714     {
1715       int bytes = BYTES_BY_CHAR_HEAD (*src);
1716
1717       if (NILP (table))
1718         charset = CHARSET_AT (src);
1719       else
1720         {
1721           int c_alt;
1722           unsigned char c1, c2;
1723
1724           SPLIT_STRING(src, bytes, charset, c1, c2);
1725           if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
1726             charset = CHAR_CHARSET (c_alt);
1727         }
1728
1729       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1730       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1731         {
1732           found++;
1733           r[reg] = charset;
1734         }
1735
1736       src += bytes;
1737     }
1738
1739   if (found)
1740     {
1741       for (reg = 0; reg < 4; reg++)
1742         if (r[reg] >= 0
1743             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1744           ENCODE_DESIGNATION (r[reg], reg, coding);
1745       *dstp = dst;
1746     }
1747 }
1748
1749 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1750
1751 int
1752 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1753      struct coding_system *coding;
1754      unsigned char *source, *destination;
1755      int src_bytes, dst_bytes;
1756 {
1757   unsigned char *src = source;
1758   unsigned char *src_end = source + src_bytes;
1759   unsigned char *dst = destination;
1760   unsigned char *dst_end = destination + dst_bytes;
1761   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1762      from DST_END to assure overflow checking is necessary only at the
1763      head of loop.  */
1764   unsigned char *adjusted_dst_end = dst_end - 19;
1765   Lisp_Object translation_table
1766       = coding->translation_table_for_encode;
1767   int result = CODING_FINISH_NORMAL;
1768
1769   if (!NILP (Venable_character_translation) && NILP (translation_table))
1770     translation_table = Vstandard_translation_table_for_encode;
1771
1772   coding->consumed_char = 0;
1773   coding->fake_multibyte = 0;
1774   while (src < src_end && (dst_bytes
1775                            ? (dst < adjusted_dst_end)
1776                            : (dst < src - 19)))
1777     {
1778       /* SRC_BASE remembers the start position in source in each loop.
1779          The loop will be exited when there's not enough source text
1780          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1781          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1782          reset to SRC_BASE before exiting.  */
1783       unsigned char *src_base = src;
1784       int charset, c1, c2, c3, c4;
1785
1786       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1787           && CODING_SPEC_ISO_BOL (coding))
1788         {
1789           /* We have to produce designation sequences if any now.  */
1790           encode_designation_at_bol (coding, translation_table,
1791                                      src, src_end, &dst);
1792           CODING_SPEC_ISO_BOL (coding) = 0;
1793         }
1794
1795       c1 = *src++;
1796       /* If we are seeing a component of a composite character, we are
1797          seeing a leading-code encoded irregularly for composition, or
1798          a composition rule if composing with rule.  We must set C1 to
1799          a normal leading-code or an ASCII code.  If we are not seeing
1800          a composite character, we must reset composition,
1801          designation, and invocation states.  */
1802       if (COMPOSING_P (coding->composing))
1803         {
1804           if (c1 < 0xA0)
1805             {
1806               /* We are not in a composite character any longer.  */
1807               coding->composing = COMPOSING_NO;
1808               ENCODE_RESET_PLANE_AND_REGISTER;
1809               ENCODE_COMPOSITION_END;
1810             }
1811           else
1812             {
1813               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1814                 {
1815                   *dst++ = c1 & 0x7F;
1816                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1817                   continue;
1818                 }
1819               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1820                 coding->composing = COMPOSING_WITH_RULE_RULE;
1821               if (c1 == 0xA0)
1822                 {
1823                   /* This is an ASCII component.  */
1824                   ONE_MORE_BYTE (c1);
1825                   c1 &= 0x7F;
1826                 }
1827               else
1828                 /* This is a leading-code of non ASCII component.  */
1829                 c1 -= 0x20;
1830             }
1831         }
1832
1833       /* Now encode one character.  C1 is a control character, an
1834          ASCII character, or a leading-code of multi-byte character.  */
1835       switch (emacs_code_class[c1])
1836         {
1837         case EMACS_ascii_code:
1838           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1839           break;
1840
1841         case EMACS_control_code:
1842           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1843             ENCODE_RESET_PLANE_AND_REGISTER;
1844           *dst++ = c1;
1845           coding->consumed_char++;
1846           break;
1847
1848         case EMACS_carriage_return_code:
1849           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1850             {
1851               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1852                 ENCODE_RESET_PLANE_AND_REGISTER;
1853               *dst++ = c1;
1854               coding->consumed_char++;
1855               break;
1856             }
1857           /* fall down to treat '\r' as '\n' ...  */
1858
1859         case EMACS_linefeed_code:
1860           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1861             ENCODE_RESET_PLANE_AND_REGISTER;
1862           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1863             bcopy (coding->spec.iso2022.initial_designation,
1864                    coding->spec.iso2022.current_designation,
1865                    sizeof coding->spec.iso2022.initial_designation);
1866           if (coding->eol_type == CODING_EOL_LF
1867               || coding->eol_type == CODING_EOL_UNDECIDED)
1868             *dst++ = ISO_CODE_LF;
1869           else if (coding->eol_type == CODING_EOL_CRLF)
1870             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1871           else
1872             *dst++ = ISO_CODE_CR;
1873           CODING_SPEC_ISO_BOL (coding) = 1;
1874           coding->consumed_char++;
1875           break;
1876
1877         case EMACS_leading_code_2:
1878           ONE_MORE_BYTE (c2);
1879           if (c2 < 0xA0)
1880             {
1881               /* invalid sequence */
1882               *dst++ = c1;
1883               src--;
1884               coding->consumed_char++;
1885             }
1886           else
1887             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1888           break;
1889
1890         case EMACS_leading_code_3:
1891           TWO_MORE_BYTES (c2, c3);
1892           if (c2 < 0xA0 || c3 < 0xA0)
1893             {
1894               /* invalid sequence */
1895               *dst++ = c1;
1896               src -= 2;
1897               coding->consumed_char++;
1898             }
1899           else if (c1 < LEADING_CODE_PRIVATE_11)
1900             ENCODE_ISO_CHARACTER (c1, c2, c3);
1901           else
1902             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1903           break;
1904
1905         case EMACS_leading_code_4:
1906           THREE_MORE_BYTES (c2, c3, c4);
1907           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1908             {
1909               /* invalid sequence */
1910               *dst++ = c1;
1911               src -= 3;
1912               coding->consumed_char++;
1913             }
1914           else
1915             ENCODE_ISO_CHARACTER (c2, c3, c4);
1916           break;
1917
1918         case EMACS_leading_code_composition:
1919           ONE_MORE_BYTE (c2);
1920           if (c2 < 0xA0)
1921             {
1922               /* invalid sequence */
1923               *dst++ = c1;
1924               src--;
1925               coding->consumed_char++;
1926             }
1927           else if (c2 == 0xFF)
1928             {
1929               ENCODE_RESET_PLANE_AND_REGISTER;
1930               coding->composing = COMPOSING_WITH_RULE_HEAD;
1931               ENCODE_COMPOSITION_WITH_RULE_START;
1932               coding->consumed_char++;
1933             }
1934           else
1935             {
1936               ENCODE_RESET_PLANE_AND_REGISTER;
1937               /* Rewind one byte because it is a character code of
1938                  composition elements.  */
1939               src--;
1940               coding->composing = COMPOSING_NO_RULE_HEAD;
1941               ENCODE_COMPOSITION_NO_RULE_START;
1942               coding->consumed_char++;
1943             }
1944           break;
1945
1946         case EMACS_invalid_code:
1947           *dst++ = c1;
1948           coding->consumed_char++;
1949           break;
1950         }
1951       continue;
1952     label_end_of_loop:
1953       result = CODING_FINISH_INSUFFICIENT_SRC;
1954       src = src_base;
1955       break;
1956     }
1957
1958   if (src < src_end && result == CODING_FINISH_NORMAL)
1959     result = CODING_FINISH_INSUFFICIENT_DST;
1960
1961   /* If this is the last block of the text to be encoded, we must
1962      reset graphic planes and registers to the initial state, and
1963      flush out the carryover if any.  */
1964   if (coding->mode & CODING_MODE_LAST_BLOCK)
1965     {
1966       ENCODE_RESET_PLANE_AND_REGISTER;
1967       if (COMPOSING_P (coding->composing))
1968         ENCODE_COMPOSITION_END;
1969     }
1970   coding->consumed = src - source;
1971   coding->produced = coding->produced_char = dst - destination;
1972   return result;
1973 }
1974
1975 \f
1976 /*** 4. SJIS and BIG5 handlers ***/
1977
1978 /* Although SJIS and BIG5 are not ISO's coding system, they are used
1979    quite widely.  So, for the moment, Emacs supports them in the bare
1980    C code.  But, in the future, they may be supported only by CCL.  */
1981
1982 /* SJIS is a coding system encoding three character sets: ASCII, right
1983    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1984    as is.  A character of charset katakana-jisx0201 is encoded by
1985    "position-code + 0x80".  A character of charset japanese-jisx0208
1986    is encoded in 2-byte but two position-codes are divided and shifted
1987    so that it fit in the range below.
1988
1989    --- CODE RANGE of SJIS ---
1990    (character set)      (range)
1991    ASCII                0x00 .. 0x7F
1992    KATAKANA-JISX0201    0xA0 .. 0xDF
1993    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xFF
1994             (2nd byte)  0x40 .. 0xFF
1995    -------------------------------
1996
1997 */
1998
1999 /* BIG5 is a coding system encoding two character sets: ASCII and
2000    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2001    character set and is encoded in two-byte.
2002
2003    --- CODE RANGE of BIG5 ---
2004    (character set)      (range)
2005    ASCII                0x00 .. 0x7F
2006    Big5 (1st byte)      0xA1 .. 0xFE
2007         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2008    --------------------------
2009
2010    Since the number of characters in Big5 is larger than maximum
2011    characters in Emacs' charset (96x96), it can't be handled as one
2012    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2013    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2014    contains frequently used characters and the latter contains less
2015    frequently used characters.  */
2016
2017 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2018    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2019    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2020    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2021
2022 /* Number of Big5 characters which have the same code in 1st byte.  */
2023 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2024
2025 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2026   do {                                                                  \
2027     unsigned int temp                                                   \
2028       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2029     if (b1 < 0xC9)                                                      \
2030       charset = charset_big5_1;                                         \
2031     else                                                                \
2032       {                                                                 \
2033         charset = charset_big5_2;                                       \
2034         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2035       }                                                                 \
2036     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2037     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2038   } while (0)
2039
2040 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2041   do {                                                                  \
2042     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2043     if (charset == charset_big5_2)                                      \
2044       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2045     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2046     b2 = temp % BIG5_SAME_ROW;                                          \
2047     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2048   } while (0)
2049
2050 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2051   do {                                                                  \
2052     int c_alt, charset_alt = (charset);                                 \
2053     if (!NILP (translation_table)                                       \
2054         && ((c_alt = translate_char (translation_table,                 \
2055                                      -1, (charset), c1, c2)) >= 0))     \
2056           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
2057     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2058       DECODE_CHARACTER_ASCII (c1);                                      \
2059     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2060       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2061     else                                                                \
2062       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2063   } while (0)
2064
2065 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)             \
2066   do {                                                          \
2067     int c_alt, charset_alt;                                     \
2068     if (!NILP (translation_table)                               \
2069         && ((c_alt = translate_char (translation_table, -1,     \
2070                                      charset, c1, c2))          \
2071             >= 0))                                              \
2072       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
2073     else                                                        \
2074       charset_alt = charset;                                    \
2075     if (charset_alt == charset_ascii)                           \
2076       *dst++ = c1;                                              \
2077     else if (CHARSET_DIMENSION (charset_alt) == 1)              \
2078       {                                                         \
2079         if (sjis_p && charset_alt == charset_katakana_jisx0201) \
2080           *dst++ = c1;                                          \
2081         else                                                    \
2082           {                                                     \
2083             *dst++ = charset_alt, *dst++ = c1;                  \
2084             coding->fake_multibyte = 1;                         \
2085           }                                                     \
2086       }                                                         \
2087     else                                                        \
2088       {                                                         \
2089         c1 &= 0x7F, c2 &= 0x7F;                                 \
2090         if (sjis_p && charset_alt == charset_jisx0208)          \
2091           {                                                     \
2092             unsigned char s1, s2;                               \
2093                                                                 \
2094             ENCODE_SJIS (c1, c2, s1, s2);                       \
2095             *dst++ = s1, *dst++ = s2;                           \
2096             coding->fake_multibyte = 1;                         \
2097           }                                                     \
2098         else if (!sjis_p                                        \
2099                  && (charset_alt == charset_big5_1              \
2100                      || charset_alt == charset_big5_2))         \
2101           {                                                     \
2102             unsigned char b1, b2;                               \
2103                                                                 \
2104             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);          \
2105             *dst++ = b1, *dst++ = b2;                           \
2106           }                                                     \
2107         else                                                    \
2108           {                                                     \
2109             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;     \
2110             coding->fake_multibyte = 1;                         \
2111           }                                                     \
2112       }                                                         \
2113     coding->consumed_char++;                                    \
2114   } while (0);
2115
2116 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2117    Check if a text is encoded in SJIS.  If it is, return
2118    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2119
2120 int
2121 detect_coding_sjis (src, src_end)
2122      unsigned char *src, *src_end;
2123 {
2124   unsigned char c;
2125
2126   while (src < src_end)
2127     {
2128       c = *src++;
2129       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2130         {
2131           if (src < src_end && *src++ < 0x40)
2132             return 0;
2133         }
2134     }
2135   return CODING_CATEGORY_MASK_SJIS;
2136 }
2137
2138 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2139    Check if a text is encoded in BIG5.  If it is, return
2140    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2141
2142 int
2143 detect_coding_big5 (src, src_end)
2144      unsigned char *src, *src_end;
2145 {
2146   unsigned char c;
2147
2148   while (src < src_end)
2149     {
2150       c = *src++;
2151       if (c >= 0xA1)
2152         {
2153           if (src >= src_end)
2154             break;
2155           c = *src++;
2156           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2157             return 0;
2158         }
2159     }
2160   return CODING_CATEGORY_MASK_BIG5;
2161 }
2162
2163 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2164    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2165
2166 int
2167 decode_coding_sjis_big5 (coding, source, destination,
2168                          src_bytes, dst_bytes, sjis_p)
2169      struct coding_system *coding;
2170      unsigned char *source, *destination;
2171      int src_bytes, dst_bytes;
2172      int sjis_p;
2173 {
2174   unsigned char *src = source;
2175   unsigned char *src_end = source + src_bytes;
2176   unsigned char *dst = destination;
2177   unsigned char *dst_end = destination + dst_bytes;
2178   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2179      from DST_END to assure overflow checking is necessary only at the
2180      head of loop.  */
2181   unsigned char *adjusted_dst_end = dst_end - 3;
2182   Lisp_Object translation_table
2183       = coding->translation_table_for_decode;
2184   int result = CODING_FINISH_NORMAL;
2185
2186   if (!NILP (Venable_character_translation) && NILP (translation_table))
2187     translation_table = Vstandard_translation_table_for_decode;
2188
2189   coding->produced_char = 0;
2190   coding->fake_multibyte = 0;
2191   while (src < src_end && (dst_bytes
2192                            ? (dst < adjusted_dst_end)
2193                            : (dst < src - 3)))
2194     {
2195       /* SRC_BASE remembers the start position in source in each loop.
2196          The loop will be exited when there's not enough source text
2197          to analyze two-byte character (within macro ONE_MORE_BYTE).
2198          In that case, SRC is reset to SRC_BASE before exiting.  */
2199       unsigned char *src_base = src;
2200       unsigned char c1 = *src++, c2, c3, c4;
2201
2202       if (c1 < 0x20)
2203         {
2204           if (c1 == '\r')
2205             {
2206               if (coding->eol_type == CODING_EOL_CRLF)
2207                 {
2208                   ONE_MORE_BYTE (c2);
2209                   if (c2 == '\n')
2210                     *dst++ = c2;
2211                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2212                     {
2213                       result = CODING_FINISH_INCONSISTENT_EOL;
2214                       goto label_end_of_loop_2;
2215                     }
2216                   else
2217                     /* To process C2 again, SRC is subtracted by 1.  */
2218                     *dst++ = c1, src--;
2219                 }
2220               else if (coding->eol_type == CODING_EOL_CR)
2221                 *dst++ = '\n';
2222               else
2223                 *dst++ = c1;
2224             }
2225           else if (c1 == '\n'
2226                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2227                    && (coding->eol_type == CODING_EOL_CR
2228                        || coding->eol_type == CODING_EOL_CRLF))
2229             {
2230               result = CODING_FINISH_INCONSISTENT_EOL;
2231               goto label_end_of_loop_2;
2232             }
2233           else
2234             *dst++ = c1;
2235           coding->produced_char++;
2236         }
2237       else if (c1 < 0x80)
2238         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2239       else if (c1 < 0xA0)
2240         {
2241           /* SJIS -> JISX0208 */
2242           if (sjis_p)
2243             {
2244               ONE_MORE_BYTE (c2);
2245               if (c2 >= 0x40)
2246                 {
2247                   DECODE_SJIS (c1, c2, c3, c4);
2248                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2249                 }
2250               else
2251                 goto label_invalid_code_2;
2252             }
2253           else
2254             goto label_invalid_code_1;
2255         }
2256       else if (c1 < 0xE0)
2257         {
2258           /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
2259           if (sjis_p)
2260             DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2261                                         /* dummy */ c2);
2262           else
2263             {
2264               int charset;
2265
2266               ONE_MORE_BYTE (c2);
2267               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2268                 {
2269                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2270                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2271                 }
2272               else
2273                 goto label_invalid_code_2;
2274             }
2275         }
2276       else                      /* C1 >= 0xE0 */
2277         {
2278           /* SJIS -> JISX0208, BIG5 -> Big5 */
2279           if (sjis_p)
2280             {
2281               ONE_MORE_BYTE (c2);
2282               if (c2 >= 0x40)
2283                 {
2284                   DECODE_SJIS (c1, c2, c3, c4);
2285                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2286                 }
2287               else
2288                 goto label_invalid_code_2;
2289             }
2290           else
2291             {
2292               int charset;
2293
2294               ONE_MORE_BYTE (c2);
2295               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2296                 {
2297                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2298                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2299                 }
2300               else
2301                 goto label_invalid_code_2;
2302             }
2303         }
2304       continue;
2305
2306     label_invalid_code_1:
2307       *dst++ = c1;
2308       coding->produced_char++;
2309       coding->fake_multibyte = 1;
2310       continue;
2311
2312     label_invalid_code_2:
2313       *dst++ = c1; *dst++= c2;
2314       coding->produced_char += 2;
2315       coding->fake_multibyte = 1;
2316       continue;
2317
2318     label_end_of_loop:
2319       result = CODING_FINISH_INSUFFICIENT_SRC;
2320     label_end_of_loop_2:
2321       src = src_base;
2322       break;
2323     }
2324
2325   if (src < src_end)
2326     {
2327       if (result == CODING_FINISH_NORMAL)
2328         result = CODING_FINISH_INSUFFICIENT_DST;
2329       else if (result != CODING_FINISH_INCONSISTENT_EOL
2330                && coding->mode & CODING_MODE_LAST_BLOCK)
2331         {
2332           src_bytes = src_end - src;
2333           if (dst_bytes && (dst_end - dst < src_bytes))
2334             src_bytes = dst_end - dst;
2335           bcopy (dst, src, src_bytes);
2336           src += src_bytes;
2337           dst += src_bytes;
2338           coding->fake_multibyte = 1;
2339         }
2340     }
2341
2342   coding->consumed = coding->consumed_char = src - source;
2343   coding->produced = dst - destination;
2344   return result;
2345 }
2346
2347 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2348    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2349    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2350    sure that all these charsets are registered as official charset
2351    (i.e. do not have extended leading-codes).  Characters of other
2352    charsets are produced without any encoding.  If SJIS_P is 1, encode
2353    SJIS text, else encode BIG5 text.  */
2354
2355 int
2356 encode_coding_sjis_big5 (coding, source, destination,
2357                          src_bytes, dst_bytes, sjis_p)
2358      struct coding_system *coding;
2359      unsigned char *source, *destination;
2360      int src_bytes, dst_bytes;
2361      int sjis_p;
2362 {
2363   unsigned char *src = source;
2364   unsigned char *src_end = source + src_bytes;
2365   unsigned char *dst = destination;
2366   unsigned char *dst_end = destination + dst_bytes;
2367   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2368      from DST_END to assure overflow checking is necessary only at the
2369      head of loop.  */
2370   unsigned char *adjusted_dst_end = dst_end - 1;
2371   Lisp_Object translation_table
2372       = coding->translation_table_for_encode;
2373   int result = CODING_FINISH_NORMAL;
2374
2375   if (!NILP (Venable_character_translation) && NILP (translation_table))
2376     translation_table = Vstandard_translation_table_for_encode;
2377
2378   coding->consumed_char = 0;
2379   coding->fake_multibyte = 0;
2380   while (src < src_end && (dst_bytes
2381                            ? (dst < adjusted_dst_end)
2382                            : (dst < src - 1)))
2383     {
2384       /* SRC_BASE remembers the start position in source in each loop.
2385          The loop will be exited when there's not enough source text
2386          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2387          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2388          before exiting.  */
2389       unsigned char *src_base = src;
2390       unsigned char c1 = *src++, c2, c3, c4;
2391
2392       if (coding->composing)
2393         {
2394           if (c1 == 0xA0)
2395             {
2396               ONE_MORE_BYTE (c1);
2397               c1 &= 0x7F;
2398             }
2399           else if (c1 >= 0xA0)
2400             c1 -= 0x20;
2401           else
2402             coding->composing = 0;
2403         }
2404
2405       switch (emacs_code_class[c1])
2406         {
2407         case EMACS_ascii_code:
2408           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2409           break;
2410
2411         case EMACS_control_code:
2412           *dst++ = c1;
2413           coding->consumed_char++;
2414           break;
2415
2416         case EMACS_carriage_return_code:
2417           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2418             {
2419               *dst++ = c1;
2420               coding->consumed_char++;
2421               break;
2422             }
2423           /* fall down to treat '\r' as '\n' ...  */
2424
2425         case EMACS_linefeed_code:
2426           if (coding->eol_type == CODING_EOL_LF
2427               || coding->eol_type == CODING_EOL_UNDECIDED)
2428             *dst++ = '\n';
2429           else if (coding->eol_type == CODING_EOL_CRLF)
2430             *dst++ = '\r', *dst++ = '\n';
2431           else
2432             *dst++ = '\r';
2433           coding->consumed_char++;
2434           break;
2435
2436         case EMACS_leading_code_2:
2437           ONE_MORE_BYTE (c2);
2438           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2439           break;
2440
2441         case EMACS_leading_code_3:
2442           TWO_MORE_BYTES (c2, c3);
2443           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2444           break;
2445
2446         case EMACS_leading_code_4:
2447           THREE_MORE_BYTES (c2, c3, c4);
2448           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2449           break;
2450
2451         case EMACS_leading_code_composition:
2452           coding->composing = 1;
2453           break;
2454
2455         default:                /* i.e. case EMACS_invalid_code: */
2456           *dst++ = c1;
2457           coding->consumed_char++;
2458         }
2459       continue;
2460
2461     label_end_of_loop:
2462       result = CODING_FINISH_INSUFFICIENT_SRC;
2463       src = src_base;
2464       break;
2465     }
2466
2467   if (result == CODING_FINISH_NORMAL
2468       && src < src_end)
2469     result = CODING_FINISH_INSUFFICIENT_DST;
2470   coding->consumed = src - source;
2471   coding->produced = coding->produced_char = dst - destination;
2472   return result;
2473 }
2474
2475 \f
2476 /*** 5. End-of-line handlers ***/
2477
2478 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2479    This function is called only when `coding->eol_type' is
2480    CODING_EOL_CRLF or CODING_EOL_CR.  */
2481
2482 int
2483 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2484      struct coding_system *coding;
2485      unsigned char *source, *destination;
2486      int src_bytes, dst_bytes;
2487 {
2488   unsigned char *src = source;
2489   unsigned char *src_end = source + src_bytes;
2490   unsigned char *dst = destination;
2491   unsigned char *dst_end = destination + dst_bytes;
2492   unsigned char c;
2493   int result = CODING_FINISH_NORMAL;
2494
2495   coding->fake_multibyte = 0;
2496
2497   if (src_bytes <= 0)
2498     return result;
2499
2500   switch (coding->eol_type)
2501     {
2502     case CODING_EOL_CRLF:
2503       {
2504         /* Since the maximum bytes produced by each loop is 2, we
2505            subtract 1 from DST_END to assure overflow checking is
2506            necessary only at the head of loop.  */
2507         unsigned char *adjusted_dst_end = dst_end - 1;
2508
2509         while (src < src_end && (dst_bytes
2510                                  ? (dst < adjusted_dst_end)
2511                                  : (dst < src - 1)))
2512           {
2513             unsigned char *src_base = src;
2514
2515             c = *src++;
2516             if (c == '\r')
2517               {
2518                 ONE_MORE_BYTE (c);
2519                 if (c != '\n')
2520                   {
2521                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2522                       {
2523                         result = CODING_FINISH_INCONSISTENT_EOL;
2524                         goto label_end_of_loop_2;
2525                       }
2526                     *dst++ = '\r';
2527                     if (BASE_LEADING_CODE_P (c))
2528                       coding->fake_multibyte = 1;
2529                   }
2530                 *dst++ = c;
2531               }
2532             else if (c == '\n'
2533                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2534               {
2535                 result = CODING_FINISH_INCONSISTENT_EOL;
2536                 goto label_end_of_loop_2;
2537               }
2538             else
2539               {
2540                 *dst++ = c;
2541                 if (BASE_LEADING_CODE_P (c))
2542                   coding->fake_multibyte = 1;
2543               }
2544             continue;
2545
2546           label_end_of_loop:
2547             result = CODING_FINISH_INSUFFICIENT_SRC;
2548           label_end_of_loop_2:
2549             src = src_base;
2550             break;
2551           }
2552         if (result == CODING_FINISH_NORMAL
2553             && src < src_end)
2554           result = CODING_FINISH_INSUFFICIENT_DST;
2555       }
2556       break;
2557
2558     case CODING_EOL_CR:
2559       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2560         {
2561           while (src < src_end)
2562             {
2563               if ((c = *src++) == '\n')
2564                 break;
2565               if (BASE_LEADING_CODE_P (c))
2566                 coding->fake_multibyte = 1;
2567             }
2568           if (*--src == '\n')
2569             {
2570               src_bytes = src - source;
2571               result = CODING_FINISH_INCONSISTENT_EOL;
2572             }
2573         }
2574       if (dst_bytes && src_bytes > dst_bytes)
2575         {
2576           result = CODING_FINISH_INSUFFICIENT_DST;
2577           src_bytes = dst_bytes;
2578         }
2579       if (dst_bytes)
2580         bcopy (source, destination, src_bytes);
2581       else
2582         safe_bcopy (source, destination, src_bytes);
2583       src = source + src_bytes;
2584       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2585       break;
2586
2587     default:                    /* i.e. case: CODING_EOL_LF */
2588       if (dst_bytes && src_bytes > dst_bytes)
2589         {
2590           result = CODING_FINISH_INSUFFICIENT_DST;
2591           src_bytes = dst_bytes;
2592         }
2593       if (dst_bytes)
2594         bcopy (source, destination, src_bytes);
2595       else
2596         safe_bcopy (source, destination, src_bytes);
2597       src += src_bytes;
2598       dst += dst_bytes;
2599       coding->fake_multibyte = 1;
2600       break;
2601     }
2602
2603   coding->consumed = coding->consumed_char = src - source;
2604   coding->produced = coding->produced_char = dst - destination;
2605   return result;
2606 }
2607
2608 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2609    format of end-of-line according to `coding->eol_type'.  If
2610    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2611    '\r' in source text also means end-of-line.  */
2612
2613 int
2614 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2615      struct coding_system *coding;
2616      unsigned char *source, *destination;
2617      int src_bytes, dst_bytes;
2618 {
2619   unsigned char *src = source;
2620   unsigned char *dst = destination;
2621   int result = CODING_FINISH_NORMAL;
2622
2623   coding->fake_multibyte = 0;
2624
2625   if (coding->eol_type == CODING_EOL_CRLF)
2626     {
2627       unsigned char c;
2628       unsigned char *src_end = source + src_bytes;
2629       unsigned char *dst_end = destination + dst_bytes;
2630       /* Since the maximum bytes produced by each loop is 2, we
2631          subtract 1 from DST_END to assure overflow checking is
2632          necessary only at the head of loop.  */
2633       unsigned char *adjusted_dst_end = dst_end - 1;
2634
2635       while (src < src_end && (dst_bytes
2636                                ? (dst < adjusted_dst_end)
2637                                : (dst < src - 1)))
2638         {
2639           c = *src++;
2640           if (c == '\n'
2641               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2642             *dst++ = '\r', *dst++ = '\n';
2643           else
2644             {
2645               *dst++ = c;
2646               if (BASE_LEADING_CODE_P (c))
2647                 coding->fake_multibyte = 1;
2648             }
2649         }
2650       if (src < src_end)
2651         result = CODING_FINISH_INSUFFICIENT_DST;
2652     }
2653   else
2654     {
2655       unsigned char c;
2656
2657       if (dst_bytes && src_bytes > dst_bytes)
2658         {
2659           src_bytes = dst_bytes;
2660           result = CODING_FINISH_INSUFFICIENT_DST;
2661         }
2662       if (dst_bytes)
2663         bcopy (source, destination, src_bytes);
2664       else
2665         {
2666           safe_bcopy (source, destination, src_bytes);
2667           dst_bytes = src_bytes;
2668         }
2669       if (coding->eol_type == CODING_EOL_CRLF)
2670         {
2671           while (src_bytes--)
2672             {
2673               if ((c = *dst++) == '\n')
2674                 dst[-1] = '\r';
2675               else if (BASE_LEADING_CODE_P (c))
2676                   coding->fake_multibyte = 1;
2677             }
2678         }
2679       else
2680         {
2681           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2682             {
2683               while (src_bytes--)
2684                 if (*dst++ == '\r') dst[-1] = '\n';
2685             }
2686           coding->fake_multibyte = 1;
2687         }
2688       src = source + dst_bytes;
2689       dst = destination + dst_bytes;
2690     }
2691
2692   coding->consumed = coding->consumed_char = src - source;
2693   coding->produced = coding->produced_char = dst - destination;
2694   return result;
2695 }
2696
2697 \f
2698 /*** 6. C library functions ***/
2699
2700 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2701    has a property `coding-system'.  The value of this property is a
2702    vector of length 5 (called as coding-vector).  Among elements of
2703    this vector, the first (element[0]) and the fifth (element[4])
2704    carry important information for decoding/encoding.  Before
2705    decoding/encoding, this information should be set in fields of a
2706    structure of type `coding_system'.
2707
2708    A value of property `coding-system' can be a symbol of another
2709    subsidiary coding-system.  In that case, Emacs gets coding-vector
2710    from that symbol.
2711
2712    `element[0]' contains information to be set in `coding->type'.  The
2713    value and its meaning is as follows:
2714
2715    0 -- coding_type_emacs_mule
2716    1 -- coding_type_sjis
2717    2 -- coding_type_iso2022
2718    3 -- coding_type_big5
2719    4 -- coding_type_ccl encoder/decoder written in CCL
2720    nil -- coding_type_no_conversion
2721    t -- coding_type_undecided (automatic conversion on decoding,
2722                                no-conversion on encoding)
2723
2724    `element[4]' contains information to be set in `coding->flags' and
2725    `coding->spec'.  The meaning varies by `coding->type'.
2726
2727    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2728    of length 32 (of which the first 13 sub-elements are used now).
2729    Meanings of these sub-elements are:
2730
2731    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2732         If the value is an integer of valid charset, the charset is
2733         assumed to be designated to graphic register N initially.
2734
2735         If the value is minus, it is a minus value of charset which
2736         reserves graphic register N, which means that the charset is
2737         not designated initially but should be designated to graphic
2738         register N just before encoding a character in that charset.
2739
2740         If the value is nil, graphic register N is never used on
2741         encoding.
2742
2743    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2744         Each value takes t or nil.  See the section ISO2022 of
2745         `coding.h' for more information.
2746
2747    If `coding->type' is `coding_type_big5', element[4] is t to denote
2748    BIG5-ETen or nil to denote BIG5-HKU.
2749
2750    If `coding->type' takes the other value, element[4] is ignored.
2751
2752    Emacs Lisp's coding system also carries information about format of
2753    end-of-line in a value of property `eol-type'.  If the value is
2754    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2755    means CODING_EOL_CR.  If it is not integer, it should be a vector
2756    of subsidiary coding systems of which property `eol-type' has one
2757    of above values.
2758
2759 */
2760
2761 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2762    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2763    is setup so that no conversion is necessary and return -1, else
2764    return 0.  */
2765
2766 int
2767 setup_coding_system (coding_system, coding)
2768      Lisp_Object coding_system;
2769      struct coding_system *coding;
2770 {
2771   Lisp_Object coding_spec, coding_type, eol_type, plist;
2772   Lisp_Object val;
2773   int i;
2774
2775   /* Initialize some fields required for all kinds of coding systems.  */
2776   coding->symbol = coding_system;
2777   coding->common_flags = 0;
2778   coding->mode = 0;
2779   coding->heading_ascii = -1;
2780   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2781   coding_spec = Fget (coding_system, Qcoding_system);
2782   if (!VECTORP (coding_spec)
2783       || XVECTOR (coding_spec)->size != 5
2784       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2785     goto label_invalid_coding_system;
2786
2787   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2788   if (VECTORP (eol_type))
2789     {
2790       coding->eol_type = CODING_EOL_UNDECIDED;
2791       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2792     }
2793   else if (XFASTINT (eol_type) == 1)
2794     {
2795       coding->eol_type = CODING_EOL_CRLF;
2796       coding->common_flags
2797         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2798     }
2799   else if (XFASTINT (eol_type) == 2)
2800     {
2801       coding->eol_type = CODING_EOL_CR;
2802       coding->common_flags
2803         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2804     }
2805   else
2806     coding->eol_type = CODING_EOL_LF;
2807
2808   coding_type = XVECTOR (coding_spec)->contents[0];
2809   /* Try short cut.  */
2810   if (SYMBOLP (coding_type))
2811     {
2812       if (EQ (coding_type, Qt))
2813         {
2814           coding->type = coding_type_undecided;
2815           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2816         }
2817       else
2818         coding->type = coding_type_no_conversion;
2819       return 0;
2820     }
2821
2822   /* Initialize remaining fields.  */
2823   coding->composing = 0;
2824   coding->translation_table_for_decode = Qnil;
2825   coding->translation_table_for_encode = Qnil;
2826
2827   /* Get values of coding system properties:
2828      `post-read-conversion', `pre-write-conversion',
2829      `translation-table-for-decode', `translation-table-for-encode'.  */
2830   plist = XVECTOR (coding_spec)->contents[3];
2831   coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2832   coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2833   val = Fplist_get (plist, Qtranslation_table_for_decode);
2834   if (SYMBOLP (val))
2835     val = Fget (val, Qtranslation_table_for_decode);
2836   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
2837   val = Fplist_get (plist, Qtranslation_table_for_encode);
2838   if (SYMBOLP (val))
2839     val = Fget (val, Qtranslation_table_for_encode);
2840   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
2841   val = Fplist_get (plist, Qcoding_category);
2842   if (!NILP (val))
2843     {
2844       val = Fget (val, Qcoding_category_index);
2845       if (INTEGERP (val))
2846         coding->category_idx = XINT (val);
2847       else
2848         goto label_invalid_coding_system;
2849     }
2850   else
2851     goto label_invalid_coding_system;
2852
2853   val = Fplist_get (plist, Qsafe_charsets);
2854   if (EQ (val, Qt))
2855     {
2856       for (i = 0; i <= MAX_CHARSET; i++)
2857         coding->safe_charsets[i] = 1;
2858     }
2859   else
2860     {
2861       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2862       while (CONSP (val))
2863         {
2864           if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2865             coding->safe_charsets[i] = 1;
2866           val = XCONS (val)->cdr;
2867         }
2868     }
2869
2870   switch (XFASTINT (coding_type))
2871     {
2872     case 0:
2873       coding->type = coding_type_emacs_mule;
2874       if (!NILP (coding->post_read_conversion))
2875         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2876       if (!NILP (coding->pre_write_conversion))
2877         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
2878       break;
2879
2880     case 1:
2881       coding->type = coding_type_sjis;
2882       coding->common_flags
2883         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2884       break;
2885
2886     case 2:
2887       coding->type = coding_type_iso2022;
2888       coding->common_flags
2889         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2890       {
2891         Lisp_Object val, temp;
2892         Lisp_Object *flags;
2893         int i, charset, reg_bits = 0;
2894
2895         val = XVECTOR (coding_spec)->contents[4];
2896
2897         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2898           goto label_invalid_coding_system;
2899
2900         flags = XVECTOR (val)->contents;
2901         coding->flags
2902           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2903              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2904              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2905              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2906              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2907              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2908              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2909              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2910              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2911              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2912              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2913              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
2914              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
2915              );
2916
2917         /* Invoke graphic register 0 to plane 0.  */
2918         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2919         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2920         CODING_SPEC_ISO_INVOCATION (coding, 1)
2921           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2922         /* Not single shifting at first.  */
2923         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2924         /* Beginning of buffer should also be regarded as bol. */
2925         CODING_SPEC_ISO_BOL (coding) = 1;
2926
2927         for (charset = 0; charset <= MAX_CHARSET; charset++)
2928           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2929         val = Vcharset_revision_alist;
2930         while (CONSP (val))
2931           {
2932             charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2933             if (charset >= 0
2934                 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2935                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2936               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2937             val = XCONS (val)->cdr;
2938           }
2939
2940         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2941            FLAGS[REG] can be one of below:
2942                 integer CHARSET: CHARSET occupies register I,
2943                 t: designate nothing to REG initially, but can be used
2944                   by any charsets,
2945                 list of integer, nil, or t: designate the first
2946                   element (if integer) to REG initially, the remaining
2947                   elements (if integer) is designated to REG on request,
2948                   if an element is t, REG can be used by any charsets,
2949                 nil: REG is never used.  */
2950         for (charset = 0; charset <= MAX_CHARSET; charset++)
2951           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2952             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2953         for (i = 0; i < 4; i++)
2954           {
2955             if (INTEGERP (flags[i])
2956                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2957                 || (charset = get_charset_id (flags[i])) >= 0)
2958               {
2959                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2960                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2961               }
2962             else if (EQ (flags[i], Qt))
2963               {
2964                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2965                 reg_bits |= 1 << i;
2966                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2967               }
2968             else if (CONSP (flags[i]))
2969               {
2970                 Lisp_Object tail = flags[i];
2971
2972                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2973                 if (INTEGERP (XCONS (tail)->car)
2974                     && (charset = XINT (XCONS (tail)->car),
2975                         CHARSET_VALID_P (charset))
2976                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2977                   {
2978                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2979                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2980                   }
2981                 else
2982                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2983                 tail = XCONS (tail)->cdr;
2984                 while (CONSP (tail))
2985                   {
2986                     if (INTEGERP (XCONS (tail)->car)
2987                         && (charset = XINT (XCONS (tail)->car),
2988                             CHARSET_VALID_P (charset))
2989                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2990                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2991                         = i;
2992                     else if (EQ (XCONS (tail)->car, Qt))
2993                       reg_bits |= 1 << i;
2994                     tail = XCONS (tail)->cdr;
2995                   }
2996               }
2997             else
2998               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2999
3000             CODING_SPEC_ISO_DESIGNATION (coding, i)
3001               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3002           }
3003
3004         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3005           {
3006             /* REG 1 can be used only by locking shift in 7-bit env.  */
3007             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3008               reg_bits &= ~2;
3009             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3010               /* Without any shifting, only REG 0 and 1 can be used.  */
3011               reg_bits &= 3;
3012           }
3013
3014         if (reg_bits)
3015           for (charset = 0; charset <= MAX_CHARSET; charset++)
3016             {
3017               if (CHARSET_VALID_P (charset))
3018                 {
3019                   /* There exist some default graphic registers to be
3020                      used CHARSET.  */
3021
3022                   /* We had better avoid designating a charset of
3023                      CHARS96 to REG 0 as far as possible.  */
3024                   if (CHARSET_CHARS (charset) == 96)
3025                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3026                       = (reg_bits & 2
3027                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3028                   else
3029                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3030                       = (reg_bits & 1
3031                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3032                 }
3033             }
3034       }
3035       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3036       coding->spec.iso2022.last_invalid_designation_register = -1;
3037       break;
3038
3039     case 3:
3040       coding->type = coding_type_big5;
3041       coding->common_flags
3042         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3043       coding->flags
3044         = (NILP (XVECTOR (coding_spec)->contents[4])
3045            ? CODING_FLAG_BIG5_HKU
3046            : CODING_FLAG_BIG5_ETEN);
3047       break;
3048
3049     case 4:
3050       coding->type = coding_type_ccl;
3051       coding->common_flags
3052         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3053       {
3054         Lisp_Object val = XVECTOR (coding_spec)->contents[4];
3055         Lisp_Object decoder, encoder;
3056
3057         if (CONSP  (val)
3058             && SYMBOLP (XCONS (val)->car)
3059             && !NILP (decoder = Fget (XCONS (val)->car, Qccl_program_idx))
3060             && !NILP (decoder = Fcdr (Faref (Vccl_program_table, decoder)))
3061             && SYMBOLP (XCONS (val)->cdr)
3062             && !NILP (encoder = Fget (XCONS (val)->cdr, Qccl_program_idx))
3063             && !NILP (encoder = Fcdr (Faref (Vccl_program_table, encoder))))
3064           {
3065             setup_ccl_program (&(coding->spec.ccl.decoder), decoder);
3066             setup_ccl_program (&(coding->spec.ccl.encoder), encoder);
3067           }
3068         else
3069           goto label_invalid_coding_system;
3070       }
3071       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3072       break;
3073
3074     case 5:
3075       coding->type = coding_type_raw_text;
3076       break;
3077
3078     default:
3079       goto label_invalid_coding_system;
3080     }
3081   return 0;
3082
3083  label_invalid_coding_system:
3084   coding->type = coding_type_no_conversion;
3085   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3086   coding->common_flags = 0;
3087   coding->eol_type = CODING_EOL_LF;
3088   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3089   return -1;
3090 }
3091
3092 /* Emacs has a mechanism to automatically detect a coding system if it
3093    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3094    it's impossible to distinguish some coding systems accurately
3095    because they use the same range of codes.  So, at first, coding
3096    systems are categorized into 7, those are:
3097
3098    o coding-category-emacs-mule
3099
3100         The category for a coding system which has the same code range
3101         as Emacs' internal format.  Assigned the coding-system (Lisp
3102         symbol) `emacs-mule' by default.
3103
3104    o coding-category-sjis
3105
3106         The category for a coding system which has the same code range
3107         as SJIS.  Assigned the coding-system (Lisp
3108         symbol) `japanese-shift-jis' by default.
3109
3110    o coding-category-iso-7
3111
3112         The category for a coding system which has the same code range
3113         as ISO2022 of 7-bit environment.  This doesn't use any locking
3114         shift and single shift functions.  This can encode/decode all
3115         charsets.  Assigned the coding-system (Lisp symbol)
3116         `iso-2022-7bit' by default.
3117
3118    o coding-category-iso-7-tight
3119
3120         Same as coding-category-iso-7 except that this can
3121         encode/decode only the specified charsets.
3122
3123    o coding-category-iso-8-1
3124
3125         The category for a coding system which has the same code range
3126         as ISO2022 of 8-bit environment and graphic plane 1 used only
3127         for DIMENSION1 charset.  This doesn't use any locking shift
3128         and single shift functions.  Assigned the coding-system (Lisp
3129         symbol) `iso-latin-1' by default.
3130
3131    o coding-category-iso-8-2
3132
3133         The category for a coding system which has the same code range
3134         as ISO2022 of 8-bit environment and graphic plane 1 used only
3135         for DIMENSION2 charset.  This doesn't use any locking shift
3136         and single shift functions.  Assigned the coding-system (Lisp
3137         symbol) `japanese-iso-8bit' by default.
3138
3139    o coding-category-iso-7-else
3140
3141         The category for a coding system which has the same code range
3142         as ISO2022 of 7-bit environemnt but uses locking shift or
3143         single shift functions.  Assigned the coding-system (Lisp
3144         symbol) `iso-2022-7bit-lock' by default.
3145
3146    o coding-category-iso-8-else
3147
3148         The category for a coding system which has the same code range
3149         as ISO2022 of 8-bit environemnt but uses locking shift or
3150         single shift functions.  Assigned the coding-system (Lisp
3151         symbol) `iso-2022-8bit-ss2' by default.
3152
3153    o coding-category-big5
3154
3155         The category for a coding system which has the same code range
3156         as BIG5.  Assigned the coding-system (Lisp symbol)
3157         `cn-big5' by default.
3158
3159    o coding-category-binary
3160
3161         The category for a coding system not categorized in any of the
3162         above.  Assigned the coding-system (Lisp symbol)
3163         `no-conversion' by default.
3164
3165    Each of them is a Lisp symbol and the value is an actual
3166    `coding-system's (this is also a Lisp symbol) assigned by a user.
3167    What Emacs does actually is to detect a category of coding system.
3168    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3169    decide only one possible category, it selects a category of the
3170    highest priority.  Priorities of categories are also specified by a
3171    user in a Lisp variable `coding-category-list'.
3172
3173 */
3174
3175 static
3176 int ascii_skip_code[256];
3177
3178 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3179    If it detects possible coding systems, return an integer in which
3180    appropriate flag bits are set.  Flag bits are defined by macros
3181    CODING_CATEGORY_MASK_XXX in `coding.h'.
3182
3183    How many ASCII characters are at the head is returned as *SKIP.  */
3184
3185 static int
3186 detect_coding_mask (source, src_bytes, priorities, skip)
3187      unsigned char *source;
3188      int src_bytes, *priorities, *skip;
3189 {
3190   register unsigned char c;
3191   unsigned char *src = source, *src_end = source + src_bytes;
3192   unsigned int mask;
3193   int i;
3194
3195   /* At first, skip all ASCII characters and control characters except
3196      for three ISO2022 specific control characters.  */
3197   ascii_skip_code[ISO_CODE_SO] = 0;
3198   ascii_skip_code[ISO_CODE_SI] = 0;
3199   ascii_skip_code[ISO_CODE_ESC] = 0;
3200
3201  label_loop_detect_coding:
3202   while (src < src_end && ascii_skip_code[*src]) src++;
3203   *skip = src - source;
3204
3205   if (src >= src_end)
3206     /* We found nothing other than ASCII.  There's nothing to do.  */
3207     return 0;
3208
3209   /* The text seems to be encoded in some multilingual coding system.
3210      Now, try to find in which coding system the text is encoded.  */
3211   if (c < 0x80)
3212     {
3213       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3214       /* C is an ISO2022 specific control code of C0.  */
3215       mask = detect_coding_iso2022 (src, src_end);
3216       if (mask == 0)
3217         {
3218           /* No valid ISO2022 code follows C.  Try again.  */
3219           src++;
3220           if (c == ISO_CODE_ESC)
3221             ascii_skip_code[ISO_CODE_ESC] = 1;
3222           else
3223             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3224           goto label_loop_detect_coding;
3225         }
3226       if (priorities)
3227         goto label_return_highest_only;
3228     }
3229   else
3230     {
3231       int try;
3232
3233       if (c < 0xA0)
3234         {
3235           /* C is the first byte of SJIS character code,
3236              or a leading-code of Emacs' internal format (emacs-mule).  */
3237           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3238
3239           /* Or, if C is a special latin extra code,
3240              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3241              or is an ISO2022 control-sequence-introducer (CSI),
3242              we should also consider the possibility of ISO2022 codings.  */
3243           if ((VECTORP (Vlatin_extra_code_table)
3244                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3245               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3246               || (c == ISO_CODE_CSI
3247                   && (src < src_end
3248                       && (*src == ']'
3249                           || ((*src == '0' || *src == '1' || *src == '2')
3250                               && src + 1 < src_end
3251                               && src[1] == ']')))))
3252             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3253                      | CODING_CATEGORY_MASK_ISO_8BIT);
3254         }
3255       else
3256         /* C is a character of ISO2022 in graphic plane right,
3257            or a SJIS's 1-byte character code (i.e. JISX0201),
3258            or the first byte of BIG5's 2-byte code.  */
3259         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3260                 | CODING_CATEGORY_MASK_ISO_8BIT
3261                 | CODING_CATEGORY_MASK_SJIS
3262                 | CODING_CATEGORY_MASK_BIG5);
3263
3264       mask = 0;
3265       if (priorities)
3266         {
3267           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3268             {
3269               if (priorities[i] & try & CODING_CATEGORY_MASK_ISO)
3270                 mask = detect_coding_iso2022 (src, src_end);
3271               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3272                 mask = detect_coding_sjis (src, src_end);
3273               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3274                 mask = detect_coding_big5 (src, src_end);
3275               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3276                 mask = detect_coding_emacs_mule (src, src_end);
3277               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3278                 mask = CODING_CATEGORY_MASK_RAW_TEXT;
3279               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3280                 mask = CODING_CATEGORY_MASK_BINARY;
3281               if (mask)
3282                 goto label_return_highest_only;
3283             }
3284           return CODING_CATEGORY_MASK_RAW_TEXT;
3285         }
3286       if (try & CODING_CATEGORY_MASK_ISO)
3287         mask |= detect_coding_iso2022 (src, src_end);
3288       if (try & CODING_CATEGORY_MASK_SJIS)
3289         mask |= detect_coding_sjis (src, src_end);
3290       if (try & CODING_CATEGORY_MASK_BIG5)
3291         mask |= detect_coding_big5 (src, src_end);
3292       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3293         mask |= detect_coding_emacs_mule (src, src_end);
3294     }
3295   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3296
3297  label_return_highest_only:
3298   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3299     {
3300       if (mask & priorities[i])
3301         return priorities[i];
3302     }
3303   return CODING_CATEGORY_MASK_RAW_TEXT;
3304 }
3305
3306 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3307    The information of the detected coding system is set in CODING.  */
3308
3309 void
3310 detect_coding (coding, src, src_bytes)
3311      struct coding_system *coding;
3312      unsigned char *src;
3313      int src_bytes;
3314 {
3315   unsigned int idx;
3316   int skip, mask, i;
3317   Lisp_Object val = Vcoding_category_list;
3318
3319   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3320   coding->heading_ascii = skip;
3321
3322   if (!mask) return;
3323
3324   /* We found a single coding system of the highest priority in MASK.  */
3325   idx = 0;
3326   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3327   if (! mask)
3328     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3329
3330   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3331
3332   if (coding->eol_type != CODING_EOL_UNDECIDED)
3333     {
3334       Lisp_Object tmp = Fget (val, Qeol_type);
3335
3336       if (VECTORP (tmp))
3337         val = XVECTOR (tmp)->contents[coding->eol_type];
3338     }
3339   setup_coding_system (val, coding);
3340   /* Set this again because setup_coding_system reset this member.  */
3341   coding->heading_ascii = skip;
3342 }
3343
3344 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3345    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3346    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3347
3348    How many non-eol characters are at the head is returned as *SKIP.  */
3349
3350 #define MAX_EOL_CHECK_COUNT 3
3351
3352 static int
3353 detect_eol_type (source, src_bytes, skip)
3354      unsigned char *source;
3355      int src_bytes, *skip;
3356 {
3357   unsigned char *src = source, *src_end = src + src_bytes;
3358   unsigned char c;
3359   int total = 0;                /* How many end-of-lines are found so far.  */
3360   int eol_type = CODING_EOL_UNDECIDED;
3361   int this_eol_type;
3362
3363   *skip = 0;
3364
3365   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3366     {
3367       c = *src++;
3368       if (c == '\n' || c == '\r')
3369         {
3370           if (*skip == 0)
3371             *skip = src - 1 - source;
3372           total++;
3373           if (c == '\n')
3374             this_eol_type = CODING_EOL_LF;
3375           else if (src >= src_end || *src != '\n')
3376             this_eol_type = CODING_EOL_CR;
3377           else
3378             this_eol_type = CODING_EOL_CRLF, src++;
3379
3380           if (eol_type == CODING_EOL_UNDECIDED)
3381             /* This is the first end-of-line.  */
3382             eol_type = this_eol_type;
3383           else if (eol_type != this_eol_type)
3384             {
3385               /* The found type is different from what found before.  */
3386               eol_type = CODING_EOL_INCONSISTENT;
3387               break;
3388             }
3389         }
3390     }
3391
3392   if (*skip == 0)
3393     *skip = src_end - source;
3394   return eol_type;
3395 }
3396
3397 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3398    is encoded.  If it detects an appropriate format of end-of-line, it
3399    sets the information in *CODING.  */
3400
3401 void
3402 detect_eol (coding, src, src_bytes)
3403      struct coding_system *coding;
3404      unsigned char *src;
3405      int src_bytes;
3406 {
3407   Lisp_Object val;
3408   int skip;
3409   int eol_type = detect_eol_type (src, src_bytes, &skip);
3410
3411   if (coding->heading_ascii > skip)
3412     coding->heading_ascii = skip;
3413   else
3414     skip = coding->heading_ascii;
3415
3416   if (eol_type == CODING_EOL_UNDECIDED)
3417     return;
3418   if (eol_type == CODING_EOL_INCONSISTENT)
3419     {
3420 #if 0
3421       /* This code is suppressed until we find a better way to
3422          distinguish raw text file and binary file.  */
3423
3424       /* If we have already detected that the coding is raw-text, the
3425          coding should actually be no-conversion.  */
3426       if (coding->type == coding_type_raw_text)
3427         {
3428           setup_coding_system (Qno_conversion, coding);
3429           return;
3430         }
3431       /* Else, let's decode only text code anyway.  */
3432 #endif /* 0 */
3433       eol_type = CODING_EOL_LF;
3434     }
3435
3436   val = Fget (coding->symbol, Qeol_type);
3437   if (VECTORP (val) && XVECTOR (val)->size == 3)
3438     {
3439       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3440       coding->heading_ascii = skip;
3441     }
3442 }
3443
3444 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3445
3446 #define DECODING_BUFFER_MAG(coding)                                          \
3447   (coding->type == coding_type_iso2022                                       \
3448    ? 3                                                                       \
3449    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3450       ? 2                                                                    \
3451       : (coding->type == coding_type_raw_text                                \
3452          ? 1                                                                 \
3453          : (coding->type == coding_type_ccl                                  \
3454             ? coding->spec.ccl.decoder.buf_magnification                     \
3455             : 2))))
3456
3457 /* Return maximum size (bytes) of a buffer enough for decoding
3458    SRC_BYTES of text encoded in CODING.  */
3459
3460 int
3461 decoding_buffer_size (coding, src_bytes)
3462      struct coding_system *coding;
3463      int src_bytes;
3464 {
3465   return (src_bytes * DECODING_BUFFER_MAG (coding)
3466           + CONVERSION_BUFFER_EXTRA_ROOM);
3467 }
3468
3469 /* Return maximum size (bytes) of a buffer enough for encoding
3470    SRC_BYTES of text to CODING.  */
3471
3472 int
3473 encoding_buffer_size (coding, src_bytes)
3474      struct coding_system *coding;
3475      int src_bytes;
3476 {
3477   int magnification;
3478
3479   if (coding->type == coding_type_ccl)
3480     magnification = coding->spec.ccl.encoder.buf_magnification;
3481   else
3482     magnification = 3;
3483
3484   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3485 }
3486
3487 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3488 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3489 #endif
3490
3491 char *conversion_buffer;
3492 int conversion_buffer_size;
3493
3494 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3495    or decoding.  Sufficient memory is allocated automatically.  If we
3496    run out of memory, return NULL.  */
3497
3498 char *
3499 get_conversion_buffer (size)
3500      int size;
3501 {
3502   if (size > conversion_buffer_size)
3503     {
3504       char *buf;
3505       int real_size = conversion_buffer_size * 2;
3506
3507       while (real_size < size) real_size *= 2;
3508       buf = (char *) xmalloc (real_size);
3509       xfree (conversion_buffer);
3510       conversion_buffer = buf;
3511       conversion_buffer_size = real_size;
3512     }
3513   return conversion_buffer;
3514 }
3515
3516 int
3517 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3518      struct coding_system *coding;
3519      unsigned char *source, *destination;
3520      int src_bytes, dst_bytes, encodep;
3521 {
3522   struct ccl_program *ccl
3523     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3524   int result;
3525
3526   coding->produced = ccl_driver (ccl, source, destination,
3527                                  src_bytes, dst_bytes, &(coding->consumed));
3528   if (encodep)
3529     {
3530       coding->produced_char = coding->produced;
3531       coding->consumed_char
3532         = multibyte_chars_in_text (source, coding->consumed);
3533     }
3534   else
3535     {
3536       coding->produced_char
3537         = multibyte_chars_in_text (destination, coding->produced);
3538       coding->consumed_char = coding->consumed;
3539     }
3540   switch (ccl->status)
3541     {
3542     case CCL_STAT_SUSPEND_BY_SRC:
3543       result = CODING_FINISH_INSUFFICIENT_SRC;
3544       break;
3545     case CCL_STAT_SUSPEND_BY_DST:
3546       result = CODING_FINISH_INSUFFICIENT_DST;
3547       break;
3548     default:
3549       result = CODING_FINISH_NORMAL;
3550       break;
3551     }
3552   return result;
3553 }
3554
3555 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3556    decoding, it may detect coding system and format of end-of-line if
3557    those are not yet decided.  */
3558
3559 int
3560 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3561      struct coding_system *coding;
3562      unsigned char *source, *destination;
3563      int src_bytes, dst_bytes;
3564 {
3565   int result;
3566
3567   if (src_bytes <= 0)
3568     {
3569       coding->produced = coding->produced_char = 0;
3570       coding->consumed = coding->consumed_char = 0;
3571       coding->fake_multibyte = 0;
3572       return CODING_FINISH_NORMAL;
3573     }
3574
3575   if (coding->type == coding_type_undecided)
3576     detect_coding (coding, source, src_bytes);
3577
3578   if (coding->eol_type == CODING_EOL_UNDECIDED)
3579     detect_eol (coding, source, src_bytes);
3580
3581   switch (coding->type)
3582     {
3583     case coding_type_emacs_mule:
3584     case coding_type_undecided:
3585     case coding_type_raw_text:
3586       if (coding->eol_type == CODING_EOL_LF
3587           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3588         goto label_no_conversion;
3589       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3590       break;
3591
3592     case coding_type_sjis:
3593       result = decode_coding_sjis_big5 (coding, source, destination,
3594                                         src_bytes, dst_bytes, 1);
3595       break;
3596
3597     case coding_type_iso2022:
3598       result = decode_coding_iso2022 (coding, source, destination,
3599                                       src_bytes, dst_bytes);
3600       break;
3601
3602     case coding_type_big5:
3603       result = decode_coding_sjis_big5 (coding, source, destination,
3604                                         src_bytes, dst_bytes, 0);
3605       break;
3606
3607     case coding_type_ccl:
3608       result = ccl_coding_driver (coding, source, destination,
3609                                   src_bytes, dst_bytes, 0);
3610       break;
3611
3612     default:                    /* i.e. case coding_type_no_conversion: */
3613     label_no_conversion:
3614       if (dst_bytes && src_bytes > dst_bytes)
3615         {
3616           coding->produced = dst_bytes;
3617           result = CODING_FINISH_INSUFFICIENT_DST;
3618         }
3619       else
3620         {
3621           coding->produced = src_bytes;
3622           result = CODING_FINISH_NORMAL;
3623         }
3624       if (dst_bytes)
3625         bcopy (source, destination, coding->produced);
3626       else
3627         safe_bcopy (source, destination, coding->produced);
3628       coding->fake_multibyte = 1;
3629       coding->consumed
3630         = coding->consumed_char = coding->produced_char = coding->produced;
3631       break;
3632     }
3633
3634   return result;
3635 }
3636
3637 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
3638
3639 int
3640 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3641      struct coding_system *coding;
3642      unsigned char *source, *destination;
3643      int src_bytes, dst_bytes;
3644 {
3645   int result;
3646
3647   if (src_bytes <= 0)
3648     {
3649       coding->produced = coding->produced_char = 0;
3650       coding->consumed = coding->consumed_char = 0;
3651       coding->fake_multibyte = 0;
3652       return CODING_FINISH_NORMAL;
3653     }
3654
3655   switch (coding->type)
3656     {
3657     case coding_type_emacs_mule:
3658     case coding_type_undecided:
3659     case coding_type_raw_text:
3660       if (coding->eol_type == CODING_EOL_LF
3661           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3662         goto label_no_conversion;
3663       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3664       break;
3665
3666     case coding_type_sjis:
3667       result = encode_coding_sjis_big5 (coding, source, destination,
3668                                         src_bytes, dst_bytes, 1);
3669       break;
3670
3671     case coding_type_iso2022:
3672       result = encode_coding_iso2022 (coding, source, destination,
3673                                       src_bytes, dst_bytes);
3674       break;
3675
3676     case coding_type_big5:
3677       result = encode_coding_sjis_big5 (coding, source, destination,
3678                                         src_bytes, dst_bytes, 0);
3679       break;
3680
3681     case coding_type_ccl:
3682       result = ccl_coding_driver (coding, source, destination,
3683                                   src_bytes, dst_bytes, 1);
3684       break;
3685
3686     default:                    /* i.e. case coding_type_no_conversion: */
3687     label_no_conversion:
3688       if (dst_bytes && src_bytes > dst_bytes)
3689         {
3690           coding->produced = dst_bytes;
3691           result = CODING_FINISH_INSUFFICIENT_DST;
3692         }
3693       else
3694         {
3695           coding->produced = src_bytes;
3696           result = CODING_FINISH_NORMAL;
3697         }
3698       if (dst_bytes)
3699         bcopy (source, destination, coding->produced);
3700       else
3701         safe_bcopy (source, destination, coding->produced);
3702       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3703         {
3704           unsigned char *p = destination, *pend = p + coding->produced;
3705           while (p < pend)
3706             if (*p++ == '\015') p[-1] = '\n';
3707         }
3708       coding->fake_multibyte = 1;
3709       coding->consumed
3710         = coding->consumed_char = coding->produced_char = coding->produced;
3711       break;
3712     }
3713
3714   return result;
3715 }
3716
3717 /* Scan text in the region between *BEG and *END (byte positions),
3718    skip characters which we don't have to decode by coding system
3719    CODING at the head and tail, then set *BEG and *END to the region
3720    of the text we actually have to convert.  The caller should move
3721    the gap out of the region in advance.
3722
3723    If STR is not NULL, *BEG and *END are indices into STR.  */
3724
3725 static void
3726 shrink_decoding_region (beg, end, coding, str)
3727      int *beg, *end;
3728      struct coding_system *coding;
3729      unsigned char *str;
3730 {
3731   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3732   int eol_conversion;
3733
3734   if (coding->type == coding_type_ccl
3735       || coding->type == coding_type_undecided
3736       || !NILP (coding->post_read_conversion))
3737     {
3738       /* We can't skip any data.  */
3739       return;
3740     }
3741   else if (coding->type == coding_type_no_conversion)
3742     {
3743       /* We need no conversion, but don't have to skip any data here.
3744          Decoding routine handles them effectively anyway.  */
3745       return;
3746     }
3747
3748   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3749
3750   if ((! eol_conversion) && (coding->heading_ascii >= 0))
3751     /* Detection routine has already found how much we can skip at the
3752        head.  */
3753     *beg += coding->heading_ascii;
3754
3755   if (str)
3756     {
3757       begp_orig = begp = str + *beg;
3758       endp_orig = endp = str + *end;
3759     }
3760   else
3761     {
3762       begp_orig = begp = BYTE_POS_ADDR (*beg);
3763       endp_orig = endp = begp + *end - *beg;
3764     }
3765
3766   switch (coding->type)
3767     {
3768     case coding_type_emacs_mule:
3769     case coding_type_raw_text:
3770       if (eol_conversion)
3771         {
3772           if (coding->heading_ascii < 0)
3773             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
3774           while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
3775             endp--;
3776           /* Do not consider LF as ascii if preceded by CR, since that
3777              confuses eol decoding. */
3778           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3779             endp++;
3780         }
3781       else
3782         begp = endp;
3783       break;
3784
3785     case coding_type_sjis:
3786     case coding_type_big5:
3787       /* We can skip all ASCII characters at the head.  */
3788       if (coding->heading_ascii < 0)
3789         {
3790           if (eol_conversion)
3791             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
3792           else
3793             while (begp < endp && *begp < 0x80) begp++;
3794         }
3795       /* We can skip all ASCII characters at the tail except for the
3796          second byte of SJIS or BIG5 code.  */
3797       if (eol_conversion)
3798         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
3799       else
3800         while (begp < endp && endp[-1] < 0x80) endp--;
3801       /* Do not consider LF as ascii if preceded by CR, since that
3802          confuses eol decoding. */
3803       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3804         endp++;
3805       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
3806         endp++;
3807       break;
3808
3809     default:            /* i.e. case coding_type_iso2022: */
3810       if (coding->heading_ascii < 0)
3811         {
3812           /* We can skip all ASCII characters at the head except for a
3813              few control codes.  */
3814           while (begp < endp && (c = *begp) < 0x80
3815                  && c != ISO_CODE_CR && c != ISO_CODE_SO
3816                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
3817                  && (!eol_conversion || c != ISO_CODE_LF))
3818             begp++;
3819         }
3820       switch (coding->category_idx)
3821         {
3822         case CODING_CATEGORY_IDX_ISO_8_1:
3823         case CODING_CATEGORY_IDX_ISO_8_2:
3824           /* We can skip all ASCII characters at the tail.  */
3825           if (eol_conversion)
3826             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
3827           else
3828             while (begp < endp && endp[-1] < 0x80) endp--;
3829           /* Do not consider LF as ascii if preceded by CR, since that
3830              confuses eol decoding. */
3831           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3832             endp++;
3833           break;
3834
3835         case CODING_CATEGORY_IDX_ISO_7:
3836         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3837           /* We can skip all charactes at the tail except for ESC and
3838              the following 2-byte at the tail.  */
3839           if (eol_conversion)
3840             while (begp < endp
3841                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\r')
3842               endp--;
3843           else
3844             while (begp < endp
3845                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
3846               endp--;
3847           /* Do not consider LF as ascii if preceded by CR, since that
3848              confuses eol decoding. */
3849           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3850             endp++;
3851           if (begp < endp && endp[-1] == ISO_CODE_ESC)
3852             {
3853               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3854                 /* This is an ASCII designation sequence.  We can
3855                     surely skip the tail.  */
3856                 endp += 2;
3857               else
3858                 /* Hmmm, we can't skip the tail.  */
3859                 endp = endp_orig;
3860             }
3861         }
3862     }
3863   *beg += begp - begp_orig;
3864   *end += endp - endp_orig;
3865   return;
3866 }
3867
3868 /* Like shrink_decoding_region but for encoding.  */
3869
3870 static void
3871 shrink_encoding_region (beg, end, coding, str)
3872      int *beg, *end;
3873      struct coding_system *coding;
3874      unsigned char *str;
3875 {
3876   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3877   int eol_conversion;
3878
3879   if (coding->type == coding_type_ccl)
3880     /* We can't skip any data.  */
3881     return;
3882   else if (coding->type == coding_type_no_conversion)
3883     {
3884       /* We need no conversion.  */
3885       *beg = *end;
3886       return;
3887     }
3888
3889   if (str)
3890     {
3891       begp_orig = begp = str + *beg;
3892       endp_orig = endp = str + *end;
3893     }
3894   else
3895     {
3896       begp_orig = begp = BYTE_POS_ADDR (*beg);
3897       endp_orig = endp = begp + *end - *beg;
3898     }
3899
3900   eol_conversion = (coding->eol_type == CODING_EOL_CR
3901                     || coding->eol_type == CODING_EOL_CRLF);
3902
3903   /* Here, we don't have to check coding->pre_write_conversion because
3904      the caller is expected to have handled it already.  */
3905   switch (coding->type)
3906     {
3907     case coding_type_undecided:
3908     case coding_type_emacs_mule:
3909     case coding_type_raw_text:
3910       if (eol_conversion)
3911         {
3912           while (begp < endp && *begp != '\n') begp++;
3913           while (begp < endp && endp[-1] != '\n') endp--;
3914         }
3915       else
3916         begp = endp;
3917       break;
3918
3919     case coding_type_iso2022:
3920       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3921         {
3922           unsigned char *bol = begp;
3923           while (begp < endp && *begp < 0x80)
3924             {
3925               begp++;
3926               if (begp[-1] == '\n')
3927                 bol = begp;
3928             }
3929           begp = bol;
3930           goto label_skip_tail;
3931         }
3932       /* fall down ... */
3933
3934     default:
3935       /* We can skip all ASCII characters at the head and tail.  */
3936       if (eol_conversion)
3937         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
3938       else
3939         while (begp < endp && *begp < 0x80) begp++;
3940     label_skip_tail:
3941       if (eol_conversion)
3942         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
3943       else
3944         while (begp < endp && *(endp - 1) < 0x80) endp--;
3945       break;
3946     }
3947
3948   *beg += begp - begp_orig;
3949   *end += endp - endp_orig;
3950   return;
3951 }
3952
3953 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
3954    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
3955    coding system CODING, and return the status code of code conversion
3956    (currently, this value has no meaning).
3957
3958    How many characters (and bytes) are converted to how many
3959    characters (and bytes) are recorded in members of the structure
3960    CODING.
3961
3962    If REPLACE is nonzero, we do various things as if the original text
3963    is deleted and a new text is inserted.  See the comments in
3964    replace_range (insdel.c) to know what we are doing.  */
3965
3966 int
3967 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
3968      int from, from_byte, to, to_byte, encodep, replace;
3969      struct coding_system *coding;
3970 {
3971   int len = to - from, len_byte = to_byte - from_byte;
3972   int require, inserted, inserted_byte;
3973   int head_skip, tail_skip, total_skip;
3974   Lisp_Object saved_coding_symbol = Qnil;
3975   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
3976   int first = 1;
3977   int fake_multibyte = 0;
3978   unsigned char *src, *dst;
3979   Lisp_Object deletion = Qnil;
3980
3981   if (from < PT && PT < to)
3982     SET_PT_BOTH (from, from_byte);
3983
3984   if (replace)
3985     {
3986       int saved_from = from;
3987
3988       prepare_to_modify_buffer (from, to, &from);
3989       if (saved_from != from)
3990         {
3991           to = from + len;
3992           if (multibyte)
3993             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
3994           else
3995             from_byte = from, to_byte = to;
3996           len_byte = to_byte - from_byte;
3997         }
3998     }
3999
4000   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4001     {
4002       /* We must detect encoding of text and eol format.  */
4003
4004       if (from < GPT && to > GPT)
4005         move_gap_both (from, from_byte);
4006       if (coding->type == coding_type_undecided)
4007         {
4008           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4009           if (coding->type == coding_type_undecided)
4010             /* It seems that the text contains only ASCII, but we
4011                should not left it undecided because the deeper
4012                decoding routine (decode_coding) tries to detect the
4013                encodings again in vain.  */
4014             coding->type = coding_type_emacs_mule;
4015         }
4016       if (coding->eol_type == CODING_EOL_UNDECIDED)
4017         {
4018           saved_coding_symbol = coding->symbol;
4019           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4020           if (coding->eol_type == CODING_EOL_UNDECIDED)
4021             coding->eol_type = CODING_EOL_LF;
4022           /* We had better recover the original eol format if we
4023              encounter an inconsitent eol format while decoding.  */
4024           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4025         }
4026     }
4027
4028   coding->consumed_char = len, coding->consumed = len_byte;
4029
4030   if (encodep
4031       ? ! CODING_REQUIRE_ENCODING (coding)
4032       : ! CODING_REQUIRE_DECODING (coding))
4033     {
4034       coding->produced = len_byte;
4035       if (multibyte
4036           && ! replace
4037           /* See the comment of the member heading_ascii in coding.h.  */
4038           && coding->heading_ascii < len_byte)
4039         {
4040           /* We still may have to combine byte at the head and the
4041              tail of the text in the region.  */
4042           if (from < GPT && GPT < to)
4043             move_gap_both (to, to_byte);
4044           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4045           adjust_after_insert (from, from_byte, to, to_byte, len);
4046           coding->produced_char = len;
4047         }
4048       else
4049         {
4050           if (!replace)
4051             adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4052           coding->produced_char = len_byte;
4053         }
4054       return 0;
4055     }
4056
4057   /* Now we convert the text.  */
4058
4059   /* For encoding, we must process pre-write-conversion in advance.  */
4060   if (encodep
4061       && ! NILP (coding->pre_write_conversion)
4062       && SYMBOLP (coding->pre_write_conversion)
4063       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4064     {
4065       /* The function in pre-write-conversion may put a new text in a
4066          new buffer.  */
4067       struct buffer *prev = current_buffer, *new;
4068
4069       call2 (coding->pre_write_conversion,
4070              make_number (from), make_number (to));
4071       if (current_buffer != prev)
4072         {
4073           len = ZV - BEGV;
4074           new = current_buffer;
4075           set_buffer_internal_1 (prev);
4076           del_range_2 (from, from_byte, to, to_byte);
4077           insert_from_buffer (new, BEG, len, 0);
4078           to = from + len;
4079           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4080           len_byte = to_byte - from_byte;
4081         }
4082     }
4083
4084   if (replace)
4085     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4086
4087   /* Try to skip the heading and tailing ASCIIs.  */
4088   {
4089     int from_byte_orig = from_byte, to_byte_orig = to_byte;
4090
4091     if (from < GPT && GPT < to)
4092       move_gap_both (from, from_byte);
4093     if (encodep)
4094       shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
4095     else
4096       shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
4097     if (from_byte == to_byte)
4098       {
4099         coding->produced = len_byte;
4100         coding->produced_char = multibyte ? len : len_byte;
4101         if (!replace)
4102           /* We must record and adjust for this new text now.  */
4103           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4104         return 0;
4105       }
4106
4107     head_skip = from_byte - from_byte_orig;
4108     tail_skip = to_byte_orig - to_byte;
4109     total_skip = head_skip + tail_skip;
4110     from += head_skip;
4111     to -= tail_skip;
4112     len -= total_skip; len_byte -= total_skip;
4113   }
4114
4115   /* For converion, we must put the gap before the text in addition to
4116      making the gap larger for efficient decoding.  The required gap
4117      size starts from 2000 which is the magic number used in make_gap.
4118      But, after one batch of conversion, it will be incremented if we
4119      find that it is not enough .  */
4120   require = 2000;
4121
4122   if (GAP_SIZE  < require)
4123     make_gap (require - GAP_SIZE);
4124   move_gap_both (from, from_byte);
4125
4126   if (GPT - BEG < beg_unchanged)
4127     beg_unchanged = GPT - BEG;
4128   if (Z - GPT < end_unchanged)
4129     end_unchanged = Z - GPT;
4130
4131   inserted = inserted_byte = 0;
4132   src = GAP_END_ADDR, dst = GPT_ADDR;
4133
4134   GAP_SIZE += len_byte;
4135   ZV -= len;
4136   Z -= len;
4137   ZV_BYTE -= len_byte;
4138   Z_BYTE -= len_byte;
4139
4140   for (;;)
4141     {
4142       int result;
4143
4144       /* The buffer memory is changed from:
4145          +--------+converted-text+---------+-------original-text------+---+
4146          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4147                   |<------------------- GAP_SIZE -------------------->|  */
4148       if (encodep)
4149         result = encode_coding (coding, src, dst, len_byte, 0);
4150       else
4151         result = decode_coding (coding, src, dst, len_byte, 0);
4152       /* to:
4153          +--------+-------converted-text--------+--+---original-text--+---+
4154          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4155                   |<------------------- GAP_SIZE -------------------->|  */
4156       if (coding->fake_multibyte)
4157         fake_multibyte = 1;
4158
4159       if (!encodep && !multibyte)
4160         coding->produced_char = coding->produced;
4161       inserted += coding->produced_char;
4162       inserted_byte += coding->produced;
4163       len_byte -= coding->consumed;
4164       src += coding->consumed;
4165       dst += inserted_byte;
4166
4167       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4168         {
4169           unsigned char *pend = dst, *p = pend - inserted_byte;
4170
4171           /* Encode LFs back to the original eol format (CR or CRLF).  */
4172           if (coding->eol_type == CODING_EOL_CR)
4173             {
4174               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4175             }
4176           else
4177             {
4178               int count = 0;
4179
4180               while (p < pend) if (*p++ == '\n') count++;
4181               if (src - dst < count)
4182                 {
4183                   /* We don't have sufficient room for putting LFs
4184                      back to CRLF.  We must record converted and
4185                      not-yet-converted text back to the buffer
4186                      content, enlarge the gap, then record them out of
4187                      the buffer contents again.  */
4188                   int add = len_byte + inserted_byte;
4189
4190                   GAP_SIZE -= add;
4191                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4192                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4193                   make_gap (count - GAP_SIZE);
4194                   GAP_SIZE += add;
4195                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4196                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4197                   /* Don't forget to update SRC, DST, and PEND.  */
4198                   src = GAP_END_ADDR - len_byte;
4199                   dst = GPT_ADDR + inserted_byte;
4200                   pend = dst;
4201                 }
4202               inserted += count;
4203               inserted_byte += count;
4204               coding->produced += count;
4205               p = dst = pend + count;
4206               while (count)
4207                 {
4208                   *--p = *--pend;
4209                   if (*p == '\n') count--, *--p = '\r';
4210                 }
4211             }
4212
4213           /* Suppress eol-format conversion in the further conversion.  */
4214           coding->eol_type = CODING_EOL_LF;
4215
4216           /* Restore the original symbol.  */
4217           coding->symbol = saved_coding_symbol;
4218
4219           continue;
4220         }
4221       if (len_byte <= 0)
4222         break;
4223       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4224         {
4225           /* The source text ends in invalid codes.  Let's just
4226              make them valid buffer contents, and finish conversion.  */
4227           inserted += len_byte;
4228           inserted_byte += len_byte;
4229           while (len_byte--)
4230             *dst++ = *src++;
4231           fake_multibyte = 1;
4232           break;
4233         }
4234       if (first)
4235         {
4236           /* We have just done the first batch of conversion which was
4237              stoped because of insufficient gap.  Let's reconsider the
4238              required gap size (i.e. SRT - DST) now.
4239
4240              We have converted ORIG bytes (== coding->consumed) into
4241              NEW bytes (coding->produced).  To convert the remaining
4242              LEN bytes, we may need REQUIRE bytes of gap, where:
4243                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4244                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4245              Here, we are sure that NEW >= ORIG.  */
4246           float ratio = coding->produced - coding->consumed;
4247           ratio /= coding->consumed;
4248           require = len_byte * ratio;
4249           first = 0;
4250         }
4251       if ((src - dst) < (require + 2000))
4252         {
4253           /* See the comment above the previous call of make_gap.  */
4254           int add = len_byte + inserted_byte;
4255
4256           GAP_SIZE -= add;
4257           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4258           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4259           make_gap (require + 2000);
4260           GAP_SIZE += add;
4261           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4262           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4263           /* Don't forget to update SRC, DST.  */
4264           src = GAP_END_ADDR - len_byte;
4265           dst = GPT_ADDR + inserted_byte;
4266         }
4267     }
4268   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4269
4270   if (multibyte
4271       && (fake_multibyte
4272           || !encodep && (to - from) != (to_byte - from_byte)))
4273     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4274
4275   /* If we have shrinked the conversion area, adjust it now.  */
4276   if (total_skip > 0)
4277     {
4278       if (tail_skip > 0)
4279         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4280       inserted += total_skip; inserted_byte += total_skip;
4281       GAP_SIZE += total_skip;
4282       GPT -= head_skip; GPT_BYTE -= head_skip;
4283       ZV -= total_skip; ZV_BYTE -= total_skip;
4284       Z -= total_skip; Z_BYTE -= total_skip;
4285       from -= head_skip; from_byte -= head_skip;
4286       to += tail_skip; to_byte += tail_skip;
4287     }
4288
4289   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4290
4291   if (! encodep && ! NILP (coding->post_read_conversion))
4292     {
4293       Lisp_Object val;
4294       int orig_inserted = inserted, pos = PT;
4295
4296       if (from != pos)
4297         temp_set_point_both (current_buffer, from, from_byte);
4298       val = call1 (coding->post_read_conversion, make_number (inserted));
4299       if (! NILP (val))
4300         {
4301           CHECK_NUMBER (val, 0);
4302           inserted = XFASTINT (val);
4303         }
4304       if (pos >= from + orig_inserted)
4305         temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4306     }
4307
4308   signal_after_change (from, to - from, inserted);
4309
4310   {
4311     coding->consumed = to_byte - from_byte;
4312     coding->consumed_char = to - from;
4313     coding->produced = inserted_byte;
4314     coding->produced_char = inserted;
4315   }
4316
4317   return 0;
4318 }
4319
4320 Lisp_Object
4321 code_convert_string (str, coding, encodep, nocopy)
4322      Lisp_Object str;
4323      struct coding_system *coding;
4324      int encodep, nocopy;
4325 {
4326   int len;
4327   char *buf;
4328   int from = 0, to = XSTRING (str)->size;
4329   int to_byte = STRING_BYTES (XSTRING (str));
4330   struct gcpro gcpro1;
4331   Lisp_Object saved_coding_symbol = Qnil;
4332   int result;
4333
4334   if (encodep && !NILP (coding->pre_write_conversion)
4335       || !encodep && !NILP (coding->post_read_conversion))
4336     {
4337       /* Since we have to call Lisp functions which assume target text
4338          is in a buffer, after setting a temporary buffer, call
4339          code_convert_region.  */
4340       int count = specpdl_ptr - specpdl;
4341       struct buffer *prev = current_buffer;
4342
4343       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4344       temp_output_buffer_setup (" *code-converting-work*");
4345       set_buffer_internal (XBUFFER (Vstandard_output));
4346       if (encodep)
4347         insert_from_string (str, 0, 0, to, to_byte, 0);
4348       else
4349         {
4350           /* We must insert the contents of STR as is without
4351              unibyte<->multibyte conversion.  */
4352           current_buffer->enable_multibyte_characters = Qnil;
4353           insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4354           current_buffer->enable_multibyte_characters = Qt;
4355         }
4356       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4357       if (encodep)
4358         /* We must return the buffer contents as unibyte string.  */
4359         current_buffer->enable_multibyte_characters = Qnil;
4360       str = make_buffer_string (BEGV, ZV, 0);
4361       set_buffer_internal (prev);
4362       return unbind_to (count, str);
4363     }
4364
4365   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4366     {
4367       /* See the comments in code_convert_region.  */
4368       if (coding->type == coding_type_undecided)
4369         {
4370           detect_coding (coding, XSTRING (str)->data, to_byte);
4371           if (coding->type == coding_type_undecided)
4372             coding->type = coding_type_emacs_mule;
4373         }
4374       if (coding->eol_type == CODING_EOL_UNDECIDED)
4375         {
4376           saved_coding_symbol = coding->symbol;
4377           detect_eol (coding, XSTRING (str)->data, to_byte);
4378           if (coding->eol_type == CODING_EOL_UNDECIDED)
4379             coding->eol_type = CODING_EOL_LF;
4380           /* We had better recover the original eol format if we
4381              encounter an inconsitent eol format while decoding.  */
4382           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4383         }
4384     }
4385
4386   if (encodep
4387       ? ! CODING_REQUIRE_ENCODING (coding)
4388       : ! CODING_REQUIRE_DECODING (coding))
4389     from = to_byte;
4390   else
4391     {
4392       /* Try to skip the heading and tailing ASCIIs.  */
4393       if (encodep)
4394         shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4395       else
4396         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4397     }
4398   if (from == to_byte)
4399     return (nocopy ? str : Fcopy_sequence (str));
4400
4401   if (encodep)
4402     len = encoding_buffer_size (coding, to_byte - from);
4403   else
4404     len = decoding_buffer_size (coding, to_byte - from);
4405   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
4406   GCPRO1 (str);
4407   buf = get_conversion_buffer (len);
4408   UNGCPRO;
4409
4410   if (from > 0)
4411     bcopy (XSTRING (str)->data, buf, from);
4412   result = (encodep
4413             ? encode_coding (coding, XSTRING (str)->data + from,
4414                              buf + from, to_byte - from, len)
4415             : decode_coding (coding, XSTRING (str)->data + from,
4416                              buf + from, to_byte - from, len));
4417   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4418     {
4419       /* We simple try to decode the whole string again but without
4420          eol-conversion this time.  */
4421       coding->eol_type = CODING_EOL_LF;
4422       coding->symbol = saved_coding_symbol;
4423       return code_convert_string (str, coding, encodep, nocopy);
4424     }
4425
4426   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4427          STRING_BYTES (XSTRING (str)) - to_byte);
4428
4429   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
4430   if (encodep)
4431     str = make_unibyte_string (buf, len + coding->produced);
4432   else
4433     str = make_string_from_bytes (buf, len + coding->produced_char,
4434                                   len + coding->produced);
4435   return str;
4436 }
4437
4438 \f
4439 #ifdef emacs
4440 /*** 7. Emacs Lisp library functions ***/
4441
4442 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4443   "Return t if OBJECT is nil or a coding-system.\n\
4444 See the documentation of `make-coding-system' for information\n\
4445 about coding-system objects.")
4446   (obj)
4447      Lisp_Object obj;
4448 {
4449   if (NILP (obj))
4450     return Qt;
4451   if (!SYMBOLP (obj))
4452     return Qnil;
4453   /* Get coding-spec vector for OBJ.  */
4454   obj = Fget (obj, Qcoding_system);
4455   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4456           ? Qt : Qnil);
4457 }
4458
4459 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4460        Sread_non_nil_coding_system, 1, 1, 0,
4461   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4462   (prompt)
4463      Lisp_Object prompt;
4464 {
4465   Lisp_Object val;
4466   do
4467     {
4468       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4469                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4470     }
4471   while (XSTRING (val)->size == 0);
4472   return (Fintern (val, Qnil));
4473 }
4474
4475 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4476   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4477 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4478   (prompt, default_coding_system)
4479      Lisp_Object prompt, default_coding_system;
4480 {
4481   Lisp_Object val;
4482   if (SYMBOLP (default_coding_system))
4483     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4484   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4485                           Qt, Qnil, Qcoding_system_history,
4486                           default_coding_system, Qnil);
4487   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4488 }
4489
4490 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4491        1, 1, 0,
4492   "Check validity of CODING-SYSTEM.\n\
4493 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4494 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4495 The value of property should be a vector of length 5.")
4496   (coding_system)
4497      Lisp_Object coding_system;
4498 {
4499   CHECK_SYMBOL (coding_system, 0);
4500   if (!NILP (Fcoding_system_p (coding_system)))
4501     return coding_system;
4502   while (1)
4503     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4504 }
4505 \f
4506 Lisp_Object
4507 detect_coding_system (src, src_bytes, highest)
4508      unsigned char *src;
4509      int src_bytes, highest;
4510 {
4511   int coding_mask, eol_type;
4512   Lisp_Object val, tmp;
4513   int dummy;
4514
4515   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4516   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4517   if (eol_type == CODING_EOL_INCONSISTENT)
4518     eol_type == CODING_EOL_UNDECIDED;
4519
4520   if (!coding_mask)
4521     {
4522       val = Qundecided;
4523       if (eol_type != CODING_EOL_UNDECIDED)
4524         {
4525           Lisp_Object val2;
4526           val2 = Fget (Qundecided, Qeol_type);
4527           if (VECTORP (val2))
4528             val = XVECTOR (val2)->contents[eol_type];
4529         }
4530       return val;
4531     }
4532
4533   /* At first, gather possible coding systems in VAL.  */
4534   val = Qnil;
4535   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4536     {
4537       int idx
4538         = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
4539       if (coding_mask & (1 << idx))
4540         {
4541           val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
4542           if (highest)
4543             break;
4544         }
4545     }
4546   if (!highest)
4547     val = Fnreverse (val);
4548
4549   /* Then, substitute the elements by subsidiary coding systems.  */
4550   for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4551     {
4552       if (eol_type != CODING_EOL_UNDECIDED)
4553         {
4554           Lisp_Object eol;
4555           eol = Fget (XCONS (tmp)->car, Qeol_type);
4556           if (VECTORP (eol))
4557             XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
4558         }
4559     }
4560   return (highest ? XCONS (val)->car : val);
4561 }
4562
4563 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4564        2, 3, 0,
4565   "Detect coding system of the text in the region between START and END.\n\
4566 Return a list of possible coding systems ordered by priority.\n\
4567 \n\
4568 If only ASCII characters are found, it returns `undecided'\n\
4569 or its subsidiary coding system according to a detected end-of-line format.\n\
4570 \n\
4571 If optional argument HIGHEST is non-nil, return the coding system of\n\
4572 highest priority.")
4573   (start, end, highest)
4574      Lisp_Object start, end, highest;
4575 {
4576   int from, to;
4577   int from_byte, to_byte;
4578
4579   CHECK_NUMBER_COERCE_MARKER (start, 0);
4580   CHECK_NUMBER_COERCE_MARKER (end, 1);
4581
4582   validate_region (&start, &end);
4583   from = XINT (start), to = XINT (end);
4584   from_byte = CHAR_TO_BYTE (from);
4585   to_byte = CHAR_TO_BYTE (to);
4586
4587   if (from < GPT && to >= GPT)
4588     move_gap_both (to, to_byte);
4589
4590   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4591                                to_byte - from_byte,
4592                                !NILP (highest));
4593 }
4594
4595 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4596        1, 2, 0,
4597   "Detect coding system of the text in STRING.\n\
4598 Return a list of possible coding systems ordered by priority.\n\
4599 \n\
4600 If only ASCII characters are found, it returns `undecided'\n\
4601 or its subsidiary coding system according to a detected end-of-line format.\n\
4602 \n\
4603 If optional argument HIGHEST is non-nil, return the coding system of\n\
4604 highest priority.")
4605   (string, highest)
4606      Lisp_Object string, highest;
4607 {
4608   CHECK_STRING (string, 0);
4609
4610   return detect_coding_system (XSTRING (string)->data,
4611                                STRING_BYTES (XSTRING (string)),
4612                                !NILP (highest));
4613 }
4614
4615 Lisp_Object
4616 code_convert_region1 (start, end, coding_system, encodep)
4617      Lisp_Object start, end, coding_system;
4618      int encodep;
4619 {
4620   struct coding_system coding;
4621   int from, to, len;
4622
4623   CHECK_NUMBER_COERCE_MARKER (start, 0);
4624   CHECK_NUMBER_COERCE_MARKER (end, 1);
4625   CHECK_SYMBOL (coding_system, 2);
4626
4627   validate_region (&start, &end);
4628   from = XFASTINT (start);
4629   to = XFASTINT (end);
4630
4631   if (NILP (coding_system))
4632     return make_number (to - from);
4633
4634   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4635     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4636
4637   coding.mode |= CODING_MODE_LAST_BLOCK;
4638   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4639                        &coding, encodep, 1);
4640   Vlast_coding_system_used = coding.symbol;
4641   return make_number (coding.produced_char);
4642 }
4643
4644 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4645        3, 3, "r\nzCoding system: ",
4646   "Decode the current region by specified coding system.\n\
4647 When called from a program, takes three arguments:\n\
4648 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4649 This function sets `last-coding-system-used' to the precise coding system\n\
4650 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4651 not fully specified.)\n\
4652 It returns the length of the decoded text.")
4653   (start, end, coding_system)
4654      Lisp_Object start, end, coding_system;
4655 {
4656   return code_convert_region1 (start, end, coding_system, 0);
4657 }
4658
4659 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
4660        3, 3, "r\nzCoding system: ",
4661   "Encode the current region by specified coding system.\n\
4662 When called from a program, takes three arguments:\n\
4663 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4664 This function sets `last-coding-system-used' to the precise coding system\n\
4665 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4666 not fully specified.)\n\
4667 It returns the length of the encoded text.")
4668   (start, end, coding_system)
4669      Lisp_Object start, end, coding_system;
4670 {
4671   return code_convert_region1 (start, end, coding_system, 1);
4672 }
4673
4674 Lisp_Object
4675 code_convert_string1 (string, coding_system, nocopy, encodep)
4676      Lisp_Object string, coding_system, nocopy;
4677      int encodep;
4678 {
4679   struct coding_system coding;
4680
4681   CHECK_STRING (string, 0);
4682   CHECK_SYMBOL (coding_system, 1);
4683
4684   if (NILP (coding_system))
4685     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4686
4687   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4688     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4689
4690   coding.mode |= CODING_MODE_LAST_BLOCK;
4691   Vlast_coding_system_used = coding.symbol;
4692   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4693 }
4694
4695 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
4696        2, 3, 0,
4697   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
4698 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4699 if the decoding operation is trivial.\n\
4700 This function sets `last-coding-system-used' to the precise coding system\n\
4701 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4702 not fully specified.)")
4703   (string, coding_system, nocopy)
4704      Lisp_Object string, coding_system, nocopy;
4705 {
4706   return code_convert_string1 (string, coding_system, nocopy, 0);
4707 }
4708
4709 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
4710        2, 3, 0,
4711   "Encode STRING to CODING-SYSTEM, and return the result.\n\
4712 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4713 if the encoding operation is trivial.\n\
4714 This function sets `last-coding-system-used' to the precise coding system\n\
4715 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4716 not fully specified.)")
4717   (string, coding_system, nocopy)
4718      Lisp_Object string, coding_system, nocopy;
4719 {
4720   return code_convert_string1 (string, coding_system, nocopy, 1);
4721 }
4722
4723 \f
4724 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
4725   "Decode a JISX0208 character of shift-jis encoding.\n\
4726 CODE is the character code in SJIS.\n\
4727 Return the corresponding character.")
4728   (code)
4729      Lisp_Object code;
4730 {
4731   unsigned char c1, c2, s1, s2;
4732   Lisp_Object val;
4733
4734   CHECK_NUMBER (code, 0);
4735   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
4736   DECODE_SJIS (s1, s2, c1, c2);
4737   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
4738   return val;
4739 }
4740
4741 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
4742   "Encode a JISX0208 character CHAR to SJIS coding system.\n\
4743 Return the corresponding character code in SJIS.")
4744   (ch)
4745      Lisp_Object ch;
4746 {
4747   int charset, c1, c2, s1, s2;
4748   Lisp_Object val;
4749
4750   CHECK_NUMBER (ch, 0);
4751   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4752   if (charset == charset_jisx0208)
4753     {
4754       ENCODE_SJIS (c1, c2, s1, s2);
4755       XSETFASTINT (val, (s1 << 8) | s2);
4756     }
4757   else
4758     XSETFASTINT (val, 0);
4759   return val;
4760 }
4761
4762 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
4763   "Decode a Big5 character CODE of BIG5 coding system.\n\
4764 CODE is the character code in BIG5.\n\
4765 Return the corresponding character.")
4766   (code)
4767      Lisp_Object code;
4768 {
4769   int charset;
4770   unsigned char b1, b2, c1, c2;
4771   Lisp_Object val;
4772
4773   CHECK_NUMBER (code, 0);
4774   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
4775   DECODE_BIG5 (b1, b2, charset, c1, c2);
4776   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
4777   return val;
4778 }
4779
4780 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
4781   "Encode the Big5 character CHAR to BIG5 coding system.\n\
4782 Return the corresponding character code in Big5.")
4783   (ch)
4784      Lisp_Object ch;
4785 {
4786   int charset, c1, c2, b1, b2;
4787   Lisp_Object val;
4788
4789   CHECK_NUMBER (ch, 0);
4790   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4791   if (charset == charset_big5_1 || charset == charset_big5_2)
4792     {
4793       ENCODE_BIG5 (charset, c1, c2, b1, b2);
4794       XSETFASTINT (val, (b1 << 8) | b2);
4795     }
4796   else
4797     XSETFASTINT (val, 0);
4798   return val;
4799 }
4800 \f
4801 DEFUN ("set-terminal-coding-system-internal",
4802        Fset_terminal_coding_system_internal,
4803        Sset_terminal_coding_system_internal, 1, 1, 0, "")
4804   (coding_system)
4805      Lisp_Object coding_system;
4806 {
4807   CHECK_SYMBOL (coding_system, 0);
4808   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
4809   /* We had better not send unsafe characters to terminal.  */
4810   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
4811
4812   return Qnil;
4813 }
4814
4815 DEFUN ("set-safe-terminal-coding-system-internal",
4816        Fset_safe_terminal_coding_system_internal,
4817        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
4818   (coding_system)
4819      Lisp_Object coding_system;
4820 {
4821   CHECK_SYMBOL (coding_system, 0);
4822   setup_coding_system (Fcheck_coding_system (coding_system),
4823                        &safe_terminal_coding);
4824   return Qnil;
4825 }
4826
4827 DEFUN ("terminal-coding-system",
4828        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
4829   "Return coding system specified for terminal output.")
4830   ()
4831 {
4832   return terminal_coding.symbol;
4833 }
4834
4835 DEFUN ("set-keyboard-coding-system-internal",
4836        Fset_keyboard_coding_system_internal,
4837        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4838   (coding_system)
4839      Lisp_Object coding_system;
4840 {
4841   CHECK_SYMBOL (coding_system, 0);
4842   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
4843   return Qnil;
4844 }
4845
4846 DEFUN ("keyboard-coding-system",
4847        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
4848   "Return coding system specified for decoding keyboard input.")
4849   ()
4850 {
4851   return keyboard_coding.symbol;
4852 }
4853
4854 \f
4855 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
4856        Sfind_operation_coding_system,  1, MANY, 0,
4857   "Choose a coding system for an operation based on the target name.\n\
4858 The value names a pair of coding systems: (DECODING-SYSTEM ENCODING-SYSTEM).\n\
4859 DECODING-SYSTEM is the coding system to use for decoding\n\
4860 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
4861 for encoding (in case OPERATION does encoding).\n\
4862 \n\
4863 The first argument OPERATION specifies an I/O primitive:\n\
4864   For file I/O, `insert-file-contents' or `write-region'.\n\
4865   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
4866   For network I/O, `open-network-stream'.\n\
4867 \n\
4868 The remaining arguments should be the same arguments that were passed\n\
4869 to the primitive.  Depending on which primitive, one of those arguments\n\
4870 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
4871 whichever argument specifies the file name is TARGET.\n\
4872 \n\
4873 TARGET has a meaning which depends on OPERATION:\n\
4874   For file I/O, TARGET is a file name.\n\
4875   For process I/O, TARGET is a process name.\n\
4876   For network I/O, TARGET is a service name or a port number\n\
4877 \n\
4878 This function looks up what specified for TARGET in,\n\
4879 `file-coding-system-alist', `process-coding-system-alist',\n\
4880 or `network-coding-system-alist' depending on OPERATION.\n\
4881 They may specify a coding system, a cons of coding systems,\n\
4882 or a function symbol to call.\n\
4883 In the last case, we call the function with one argument,\n\
4884 which is a list of all the arguments given to this function.")
4885   (nargs, args)
4886      int nargs;
4887      Lisp_Object *args;
4888 {
4889   Lisp_Object operation, target_idx, target, val;
4890   register Lisp_Object chain;
4891
4892   if (nargs < 2)
4893     error ("Too few arguments");
4894   operation = args[0];
4895   if (!SYMBOLP (operation)
4896       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
4897     error ("Invalid first arguement");
4898   if (nargs < 1 + XINT (target_idx))
4899     error ("Too few arguments for operation: %s",
4900            XSYMBOL (operation)->name->data);
4901   target = args[XINT (target_idx) + 1];
4902   if (!(STRINGP (target)
4903         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
4904     error ("Invalid %dth argument", XINT (target_idx) + 1);
4905
4906   chain = ((EQ (operation, Qinsert_file_contents)
4907             || EQ (operation, Qwrite_region))
4908            ? Vfile_coding_system_alist
4909            : (EQ (operation, Qopen_network_stream)
4910               ? Vnetwork_coding_system_alist
4911               : Vprocess_coding_system_alist));
4912   if (NILP (chain))
4913     return Qnil;
4914
4915   for (; CONSP (chain); chain = XCONS (chain)->cdr)
4916     {
4917       Lisp_Object elt;
4918       elt = XCONS (chain)->car;
4919
4920       if (CONSP (elt)
4921           && ((STRINGP (target)
4922                && STRINGP (XCONS (elt)->car)
4923                && fast_string_match (XCONS (elt)->car, target) >= 0)
4924               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
4925         {
4926           val = XCONS (elt)->cdr;
4927           /* Here, if VAL is both a valid coding system and a valid
4928              function symbol, we return VAL as a coding system.  */
4929           if (CONSP (val))
4930             return val;
4931           if (! SYMBOLP (val))
4932             return Qnil;
4933           if (! NILP (Fcoding_system_p (val)))
4934             return Fcons (val, val);
4935           if (! NILP (Ffboundp (val)))
4936             {
4937               val = call1 (val, Flist (nargs, args));
4938               if (CONSP (val))
4939                 return val;
4940               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
4941                 return Fcons (val, val);
4942             }
4943           return Qnil;
4944         }
4945     }
4946   return Qnil;
4947 }
4948
4949 DEFUN ("update-iso-coding-systems", Fupdate_iso_coding_systems,
4950        Supdate_iso_coding_systems, 0, 0, 0,
4951   "Update internal database for ISO2022 based coding systems.\n\
4952 When values of the following coding categories are changed, you must\n\
4953 call this function:\n\
4954   coding-category-iso-7, coding-category-iso-7-tight,\n\
4955   coding-category-iso-8-1, coding-category-iso-8-2,\n\
4956   coding-category-iso-7-else, coding-category-iso-8-else")
4957   ()
4958 {
4959   int i;
4960
4961   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_ISO_8_ELSE;
4962        i++)
4963     {
4964       if (! coding_system_table[i])
4965         coding_system_table[i]
4966           = (struct coding_system *) xmalloc (sizeof (struct coding_system));
4967       setup_coding_system
4968         (XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value,
4969          coding_system_table[i]);
4970     }
4971   return Qnil;
4972 }
4973
4974 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
4975        Sset_coding_priority_internal, 0, 0, 0,
4976   "Update internal database for the current value of `coding-category-list'.\n\
4977 This function is internal use only.")
4978   ()
4979 {
4980   int i = 0, idx;
4981   Lisp_Object val = Vcoding_category_list;
4982
4983   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
4984     {
4985       if (! SYMBOLP (XCONS (val)->car))
4986         break;
4987       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
4988       if (idx >= CODING_CATEGORY_IDX_MAX)
4989         break;
4990       coding_priorities[i++] = (1 << idx);
4991       val = XCONS (val)->cdr;
4992     }
4993   /* If coding-category-list is valid and contains all coding
4994      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
4995      the following code saves Emacs from craching.  */
4996   while (i < CODING_CATEGORY_IDX_MAX)
4997     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
4998
4999   return Qnil;
5000 }
5001
5002 #endif /* emacs */
5003
5004 \f
5005 /*** 8. Post-amble ***/
5006
5007 void
5008 init_coding_once ()
5009 {
5010   int i;
5011
5012   /* Emacs' internal format specific initialize routine.  */
5013   for (i = 0; i <= 0x20; i++)
5014     emacs_code_class[i] = EMACS_control_code;
5015   emacs_code_class[0x0A] = EMACS_linefeed_code;
5016   emacs_code_class[0x0D] = EMACS_carriage_return_code;
5017   for (i = 0x21 ; i < 0x7F; i++)
5018     emacs_code_class[i] = EMACS_ascii_code;
5019   emacs_code_class[0x7F] = EMACS_control_code;
5020   emacs_code_class[0x80] = EMACS_leading_code_composition;
5021   for (i = 0x81; i < 0xFF; i++)
5022     emacs_code_class[i] = EMACS_invalid_code;
5023   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
5024   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
5025   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
5026   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
5027
5028   /* ISO2022 specific initialize routine.  */
5029   for (i = 0; i < 0x20; i++)
5030     iso_code_class[i] = ISO_control_code;
5031   for (i = 0x21; i < 0x7F; i++)
5032     iso_code_class[i] = ISO_graphic_plane_0;
5033   for (i = 0x80; i < 0xA0; i++)
5034     iso_code_class[i] = ISO_control_code;
5035   for (i = 0xA1; i < 0xFF; i++)
5036     iso_code_class[i] = ISO_graphic_plane_1;
5037   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
5038   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
5039   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
5040   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
5041   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
5042   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
5043   iso_code_class[ISO_CODE_ESC] = ISO_escape;
5044   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
5045   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
5046   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5047
5048   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
5049   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5050
5051   setup_coding_system (Qnil, &keyboard_coding);
5052   setup_coding_system (Qnil, &terminal_coding);
5053   setup_coding_system (Qnil, &safe_terminal_coding);
5054
5055   bzero (coding_system_table, sizeof coding_system_table);
5056
5057   bzero (ascii_skip_code, sizeof ascii_skip_code);
5058   for (i = 0; i < 128; i++)
5059     ascii_skip_code[i] = 1;
5060
5061 #if defined (MSDOS) || defined (WINDOWSNT)
5062   system_eol_type = CODING_EOL_CRLF;
5063 #else
5064   system_eol_type = CODING_EOL_LF;
5065 #endif
5066 }
5067
5068 #ifdef emacs
5069
5070 void
5071 syms_of_coding ()
5072 {
5073   Qtarget_idx = intern ("target-idx");
5074   staticpro (&Qtarget_idx);
5075
5076   Qcoding_system_history = intern ("coding-system-history");
5077   staticpro (&Qcoding_system_history);
5078   Fset (Qcoding_system_history, Qnil);
5079
5080   /* Target FILENAME is the first argument.  */
5081   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5082   /* Target FILENAME is the third argument.  */
5083   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5084
5085   Qcall_process = intern ("call-process");
5086   staticpro (&Qcall_process);
5087   /* Target PROGRAM is the first argument.  */
5088   Fput (Qcall_process, Qtarget_idx, make_number (0));
5089
5090   Qcall_process_region = intern ("call-process-region");
5091   staticpro (&Qcall_process_region);
5092   /* Target PROGRAM is the third argument.  */
5093   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5094
5095   Qstart_process = intern ("start-process");
5096   staticpro (&Qstart_process);
5097   /* Target PROGRAM is the third argument.  */
5098   Fput (Qstart_process, Qtarget_idx, make_number (2));
5099
5100   Qopen_network_stream = intern ("open-network-stream");
5101   staticpro (&Qopen_network_stream);
5102   /* Target SERVICE is the fourth argument.  */
5103   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5104
5105   Qcoding_system = intern ("coding-system");
5106   staticpro (&Qcoding_system);
5107
5108   Qeol_type = intern ("eol-type");
5109   staticpro (&Qeol_type);
5110
5111   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5112   staticpro (&Qbuffer_file_coding_system);
5113
5114   Qpost_read_conversion = intern ("post-read-conversion");
5115   staticpro (&Qpost_read_conversion);
5116
5117   Qpre_write_conversion = intern ("pre-write-conversion");
5118   staticpro (&Qpre_write_conversion);
5119
5120   Qno_conversion = intern ("no-conversion");
5121   staticpro (&Qno_conversion);
5122
5123   Qundecided = intern ("undecided");
5124   staticpro (&Qundecided);
5125
5126   Qcoding_system_p = intern ("coding-system-p");
5127   staticpro (&Qcoding_system_p);
5128
5129   Qcoding_system_error = intern ("coding-system-error");
5130   staticpro (&Qcoding_system_error);
5131
5132   Fput (Qcoding_system_error, Qerror_conditions,
5133         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5134   Fput (Qcoding_system_error, Qerror_message,
5135         build_string ("Invalid coding system"));
5136
5137   Qcoding_category = intern ("coding-category");
5138   staticpro (&Qcoding_category);
5139   Qcoding_category_index = intern ("coding-category-index");
5140   staticpro (&Qcoding_category_index);
5141
5142   Vcoding_category_table
5143     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5144   staticpro (&Vcoding_category_table);
5145   {
5146     int i;
5147     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5148       {
5149         XVECTOR (Vcoding_category_table)->contents[i]
5150           = intern (coding_category_name[i]);
5151         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5152               Qcoding_category_index, make_number (i));
5153       }
5154   }
5155
5156   Qtranslation_table = intern ("translation-table");
5157   staticpro (&Qtranslation_table);
5158   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (0));
5159
5160   Qtranslation_table_id = intern ("translation-table-id");
5161   staticpro (&Qtranslation_table_id);
5162
5163   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
5164   staticpro (&Qtranslation_table_for_decode);
5165
5166   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
5167   staticpro (&Qtranslation_table_for_encode);
5168
5169   Qsafe_charsets = intern ("safe-charsets");
5170   staticpro (&Qsafe_charsets);
5171
5172   Qemacs_mule = intern ("emacs-mule");
5173   staticpro (&Qemacs_mule);
5174
5175   Qraw_text = intern ("raw-text");
5176   staticpro (&Qraw_text);
5177
5178   defsubr (&Scoding_system_p);
5179   defsubr (&Sread_coding_system);
5180   defsubr (&Sread_non_nil_coding_system);
5181   defsubr (&Scheck_coding_system);
5182   defsubr (&Sdetect_coding_region);
5183   defsubr (&Sdetect_coding_string);
5184   defsubr (&Sdecode_coding_region);
5185   defsubr (&Sencode_coding_region);
5186   defsubr (&Sdecode_coding_string);
5187   defsubr (&Sencode_coding_string);
5188   defsubr (&Sdecode_sjis_char);
5189   defsubr (&Sencode_sjis_char);
5190   defsubr (&Sdecode_big5_char);
5191   defsubr (&Sencode_big5_char);
5192   defsubr (&Sset_terminal_coding_system_internal);
5193   defsubr (&Sset_safe_terminal_coding_system_internal);
5194   defsubr (&Sterminal_coding_system);
5195   defsubr (&Sset_keyboard_coding_system_internal);
5196   defsubr (&Skeyboard_coding_system);
5197   defsubr (&Sfind_operation_coding_system);
5198   defsubr (&Supdate_iso_coding_systems);
5199   defsubr (&Sset_coding_priority_internal);
5200
5201   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5202     "List of coding systems.\n\
5203 \n\
5204 Do not alter the value of this variable manually.  This variable should be\n\
5205 updated by the functions `make-coding-system' and\n\
5206 `define-coding-system-alias'.");
5207   Vcoding_system_list = Qnil;
5208
5209   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5210     "Alist of coding system names.\n\
5211 Each element is one element list of coding system name.\n\
5212 This variable is given to `completing-read' as TABLE argument.\n\
5213 \n\
5214 Do not alter the value of this variable manually.  This variable should be\n\
5215 updated by the functions `make-coding-system' and\n\
5216 `define-coding-system-alias'.");
5217   Vcoding_system_alist = Qnil;
5218
5219   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5220     "List of coding-categories (symbols) ordered by priority.");
5221   {
5222     int i;
5223
5224     Vcoding_category_list = Qnil;
5225     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5226       Vcoding_category_list
5227         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5228                  Vcoding_category_list);
5229   }
5230
5231   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
5232     "Specify the coding system for read operations.\n\
5233 It is useful to bind this variable with `let', but do not set it globally.\n\
5234 If the value is a coding system, it is used for decoding on read operation.\n\
5235 If not, an appropriate element is used from one of the coding system alists:\n\
5236 There are three such tables, `file-coding-system-alist',\n\
5237 `process-coding-system-alist', and `network-coding-system-alist'.");
5238   Vcoding_system_for_read = Qnil;
5239
5240   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
5241     "Specify the coding system for write operations.\n\
5242 It is useful to bind this variable with `let', but do not set it globally.\n\
5243 If the value is a coding system, it is used for encoding on write operation.\n\
5244 If not, an appropriate element is used from one of the coding system alists:\n\
5245 There are three such tables, `file-coding-system-alist',\n\
5246 `process-coding-system-alist', and `network-coding-system-alist'.");
5247   Vcoding_system_for_write = Qnil;
5248
5249   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
5250     "Coding system used in the latest file or process I/O.");
5251   Vlast_coding_system_used = Qnil;
5252
5253   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
5254     "*Non-nil inhibit code conversion of end-of-line format in any cases.");
5255   inhibit_eol_conversion = 0;
5256
5257   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
5258     "Non-nil means process buffer inherits coding system of process output.\n\
5259 Bind it to t if the process output is to be treated as if it were a file\n\
5260 read from some filesystem.");
5261   inherit_process_coding_system = 0;
5262
5263   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5264     "Alist to decide a coding system to use for a file I/O operation.\n\
5265 The format is ((PATTERN . VAL) ...),\n\
5266 where PATTERN is a regular expression matching a file name,\n\
5267 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5268 If VAL is a coding system, it is used for both decoding and encoding\n\
5269 the file contents.\n\
5270 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5271 and the cdr part is used for encoding.\n\
5272 If VAL is a function symbol, the function must return a coding system\n\
5273 or a cons of coding systems which are used as above.\n\
5274 \n\
5275 See also the function `find-operation-coding-system'.");
5276   Vfile_coding_system_alist = Qnil;
5277
5278   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5279     "Alist to decide a coding system to use for a process I/O operation.\n\
5280 The format is ((PATTERN . VAL) ...),\n\
5281 where PATTERN is a regular expression matching a program name,\n\
5282 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5283 If VAL is a coding system, it is used for both decoding what received\n\
5284 from the program and encoding what sent to the program.\n\
5285 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5286 and the cdr part is used for encoding.\n\
5287 If VAL is a function symbol, the function must return a coding system\n\
5288 or a cons of coding systems which are used as above.\n\
5289 \n\
5290 See also the function `find-operation-coding-system'.");
5291   Vprocess_coding_system_alist = Qnil;
5292
5293   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5294     "Alist to decide a coding system to use for a network I/O operation.\n\
5295 The format is ((PATTERN . VAL) ...),\n\
5296 where PATTERN is a regular expression matching a network service name\n\
5297 or is a port number to connect to,\n\
5298 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5299 If VAL is a coding system, it is used for both decoding what received\n\
5300 from the network stream and encoding what sent to the network stream.\n\
5301 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5302 and the cdr part is used for encoding.\n\
5303 If VAL is a function symbol, the function must return a coding system\n\
5304 or a cons of coding systems which are used as above.\n\
5305 \n\
5306 See also the function `find-operation-coding-system'.");
5307   Vnetwork_coding_system_alist = Qnil;
5308
5309   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
5310     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
5311   eol_mnemonic_unix = ':';
5312
5313   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
5314     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
5315   eol_mnemonic_dos = '\\';
5316
5317   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
5318     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
5319   eol_mnemonic_mac = '/';
5320
5321   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5322     "Mnemonic character indicating end-of-line format is not yet decided.");
5323   eol_mnemonic_undecided = ':';
5324
5325   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
5326     "*Non-nil enables character translation while encoding and decoding.");
5327   Venable_character_translation = Qt;
5328
5329   DEFVAR_LISP ("standard-translation-table-for-decode",
5330     &Vstandard_translation_table_for_decode,
5331     "Table for translating characters while decoding.");
5332   Vstandard_translation_table_for_decode = Qnil;
5333
5334   DEFVAR_LISP ("standard-translation-table-for-encode",
5335     &Vstandard_translation_table_for_encode,
5336     "Table for translationg characters while encoding.");
5337   Vstandard_translation_table_for_encode = Qnil;
5338
5339   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5340     "Alist of charsets vs revision numbers.\n\
5341 While encoding, if a charset (car part of an element) is found,\n\
5342 designate it with the escape sequence identifing revision (cdr part of the element).");
5343   Vcharset_revision_alist = Qnil;
5344
5345   DEFVAR_LISP ("default-process-coding-system",
5346                &Vdefault_process_coding_system,
5347     "Cons of coding systems used for process I/O by default.\n\
5348 The car part is used for decoding a process output,\n\
5349 the cdr part is used for encoding a text to be sent to a process.");
5350   Vdefault_process_coding_system = Qnil;
5351
5352   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5353     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5354 This is a vector of length 256.\n\
5355 If Nth element is non-nil, the existence of code N in a file\n\
5356 \(or output of subprocess) doesn't prevent it to be detected as\n\
5357 a coding system of ISO 2022 variant which has a flag\n\
5358 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5359 or reading output of a subprocess.\n\
5360 Only 128th through 159th elements has a meaning.");
5361   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5362
5363   DEFVAR_LISP ("select-safe-coding-system-function",
5364                &Vselect_safe_coding_system_function,
5365     "Function to call to select safe coding system for encoding a text.\n\
5366 \n\
5367 If set, this function is called to force a user to select a proper\n\
5368 coding system which can encode the text in the case that a default\n\
5369 coding system used in each operation can't encode the text.\n\
5370 \n\
5371 The default value is `select-safe-codign-system' (which see).");
5372   Vselect_safe_coding_system_function = Qnil;
5373
5374 }
5375
5376 #endif /* emacs */