code.delx.au - gnu-emacs/blob - src/coding.c

   1 /* Coding system handler (conversion, detection, and etc).
   2    Copyright (C) 1995, 1997, 1998 Electrotechnical Laboratory, JAPAN.
   3    Licensed to the Free Software Foundation.
   4
   5 This file is part of GNU Emacs.
   6
   7 GNU Emacs is free software; you can redistribute it and/or modify
   8 it under the terms of the GNU General Public License as published by
   9 the Free Software Foundation; either version 2, or (at your option)
  10 any later version.
  11
  12 GNU Emacs is distributed in the hope that it will be useful,
  13 but WITHOUT ANY WARRANTY; without even the implied warranty of
  14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  15 GNU General Public License for more details.
  16
  17 You should have received a copy of the GNU General Public License
  18 along with GNU Emacs; see the file COPYING.  If not, write to
  19 the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
  20 Boston, MA 02111-1307, USA.  */
  21
  22 /*** TABLE OF CONTENTS ***
  23
  24   1. Preamble
  25   2. Emacs' internal format (emacs-mule) handlers
  26   3. ISO2022 handlers
  27   4. Shift-JIS and BIG5 handlers
  28   5. End-of-line handlers
  29   6. C library functions
  30   7. Emacs Lisp library functions
  31   8. Post-amble
  32
  33 */
  34
  35 /*** GENERAL NOTE on CODING SYSTEM ***
  36
  37   Coding system is an encoding mechanism of one or more character
  38   sets.  Here's a list of coding systems which Emacs can handle.  When
  39   we say "decode", it means converting some other coding system to
  40   Emacs' internal format (emacs-internal), and when we say "encode",
  41   it means converting the coding system emacs-mule to some other
  42   coding system.
  43
  44   0. Emacs' internal format (emacs-mule)
  45
  46   Emacs itself holds a multi-lingual character in a buffer and a string
  47   in a special format.  Details are described in section 2.
  48
  49   1. ISO2022
  50
  51   The most famous coding system for multiple character sets.  X's
  52   Compound Text, various EUCs (Extended Unix Code), and coding
  53   systems used in Internet communication such as ISO-2022-JP are
  54   all variants of ISO2022.  Details are described in section 3.
  55
  56   2. SJIS (or Shift-JIS or MS-Kanji-Code)
  57
  58   A coding system to encode character sets: ASCII, JISX0201, and
  59   JISX0208.  Widely used for PC's in Japan.  Details are described in
  60   section 4.
  61
  62   3. BIG5
  63
  64   A coding system to encode character sets: ASCII and Big5.  Widely
  65   used by Chinese (mainly in Taiwan and Hong Kong).  Details are
  66   described in section 4.  In this file, when we write "BIG5"
  67   (all uppercase), we mean the coding system, and when we write
  68   "Big5" (capitalized), we mean the character set.
  69
  70   4. Raw text
  71
  72   A coding system for a text containing random 8-bit code.  Emacs does
  73   no code conversion on such a text except for end-of-line format.
  74
  75   5. Other
  76
  77   If a user wants to read/write a text encoded in a coding system not
  78   listed above, he can supply a decoder and an encoder for it in CCL
  79   (Code Conversion Language) programs.  Emacs executes the CCL program
  80   while reading/writing.
  81
  82   Emacs represents a coding system by a Lisp symbol that has a property
  83   `coding-system'.  But, before actually using the coding system, the
  84   information about it is set in a structure of type `struct
  85   coding_system' for rapid processing.  See section 6 for more details.
  86
  87 */
  88
  89 /*** GENERAL NOTES on END-OF-LINE FORMAT ***
  90
  91   How end-of-line of a text is encoded depends on a system.  For
  92   instance, Unix's format is just one byte of `line-feed' code,
  93   whereas DOS's format is two-byte sequence of `carriage-return' and
  94   `line-feed' codes.  MacOS's format is usually one byte of
  95   `carriage-return'.
  96
  97   Since text characters encoding and end-of-line encoding are
  98   independent, any coding system described above can take
  99   any format of end-of-line.  So, Emacs has information of format of
 100   end-of-line in each coding-system.  See section 6 for more details.
 101
 102 */
 103
 104 /*** GENERAL NOTES on `detect_coding_XXX ()' functions ***
 105
 106   These functions check if a text between SRC and SRC_END is encoded
 107   in the coding system category XXX.  Each returns an integer value in
 108   which appropriate flag bits for the category XXX is set.  The flag
 109   bits are defined in macros CODING_CATEGORY_MASK_XXX.  Below is the
 110   template of these functions.  */
 111 #if 0
 112 int
 113 detect_coding_emacs_mule (src, src_end)
 114      unsigned char *src, *src_end;
 115 {
 116   ...
 117 }
 118 #endif
 119
 120 /*** GENERAL NOTES on `decode_coding_XXX ()' functions ***
 121
 122   These functions decode SRC_BYTES length text at SOURCE encoded in
 123   CODING to Emacs' internal format (emacs-mule).  The resulting text
 124   goes to a place pointed to by DESTINATION, the length of which
 125   should not exceed DST_BYTES.  These functions set the information of
 126   original and decoded texts in the members produced, produced_char,
 127   consumed, and consumed_char of the structure *CODING.
 128
 129   The return value is an integer (CODING_FINISH_XXX) indicating how
 130   the decoding finished.
 131
 132   DST_BYTES zero means that source area and destination area are
 133   overlapped, which means that we can produce a decoded text until it
 134   reaches at the head of not-yet-decoded source text.
 135
 136   Below is a template of these functions.  */
 137 #if 0
 138 decode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 139      struct coding_system *coding;
 140      unsigned char *source, *destination;
 141      int src_bytes, dst_bytes;
 142 {
 143   ...
 144 }
 145 #endif
 146
 147 /*** GENERAL NOTES on `encode_coding_XXX ()' functions ***
 148
 149   These functions encode SRC_BYTES length text at SOURCE of Emacs'
 150   internal format (emacs-mule) to CODING.  The resulting text goes to
 151   a place pointed to by DESTINATION, the length of which should not
 152   exceed DST_BYTES.  These functions set the information of
 153   original and encoded texts in the members produced, produced_char,
 154   consumed, and consumed_char of the structure *CODING.
 155
 156   The return value is an integer (CODING_FINISH_XXX) indicating how
 157   the encoding finished.
 158
 159   DST_BYTES zero means that source area and destination area are
 160   overlapped, which means that we can produce a decoded text until it
 161   reaches at the head of not-yet-decoded source text.
 162
 163   Below is a template of these functions.  */
 164 #if 0
 165 encode_coding_XXX (coding, source, destination, src_bytes, dst_bytes)
 166      struct coding_system *coding;
 167      unsigned char *source, *destination;
 168      int src_bytes, dst_bytes;
 169 {
 170   ...
 171 }
 172 #endif
 173
 174 /*** COMMONLY USED MACROS ***/
 175
 176 /* The following three macros ONE_MORE_BYTE, TWO_MORE_BYTES, and
 177    THREE_MORE_BYTES safely get one, two, and three bytes from the
 178    source text respectively.  If there are not enough bytes in the
 179    source, they jump to `label_end_of_loop'.  The caller should set
 180    variables `src' and `src_end' to appropriate areas in advance.  */
 181
 182 #define ONE_MORE_BYTE(c1)       \
 183   do {                          \
 184     if (src < src_end)          \
 185       c1 = *src++;              \
 186     else                        \
 187       goto label_end_of_loop;   \
 188   } while (0)
 189
 190 #define TWO_MORE_BYTES(c1, c2)  \
 191   do {                          \
 192     if (src + 1 < src_end)      \
 193       c1 = *src++, c2 = *src++; \
 194     else                        \
 195       goto label_end_of_loop;   \
 196   } while (0)
 197
 198 #define THREE_MORE_BYTES(c1, c2, c3)            \
 199   do {                                          \
 200     if (src + 2 < src_end)                      \
 201       c1 = *src++, c2 = *src++, c3 = *src++;    \
 202     else                                        \
 203       goto label_end_of_loop;                   \
 204   } while (0)
 205
 206 /* The following three macros DECODE_CHARACTER_ASCII,
 207    DECODE_CHARACTER_DIMENSION1, and DECODE_CHARACTER_DIMENSION2 put
 208    the multi-byte form of a character of each class at the place
 209    pointed by `dst'.  The caller should set the variable `dst' to
 210    point to an appropriate area and the variable `coding' to point to
 211    the coding-system of the currently decoding text in advance.  */
 212
 213 /* Decode one ASCII character C.  */
 214
 215 #define DECODE_CHARACTER_ASCII(c)                               \
 216   do {                                                          \
 217     if (COMPOSING_P (coding->composing))                        \
 218       *dst++ = 0xA0, *dst++ = (c) | 0x80;                       \
 219     else                                                        \
 220       {                                                         \
 221         *dst++ = (c);                                           \
 222         coding->produced_char++;                                \
 223       }                                                         \
 224   } while (0)
 225
 226 /* Decode one DIMENSION1 character whose charset is CHARSET and whose
 227    position-code is C.  */
 228
 229 #define DECODE_CHARACTER_DIMENSION1(charset, c)                         \
 230   do {                                                                  \
 231     unsigned char leading_code = CHARSET_LEADING_CODE_BASE (charset);   \
 232     if (COMPOSING_P (coding->composing))                                \
 233       *dst++ = leading_code + 0x20;                                     \
 234     else                                                                \
 235       {                                                                 \
 236         *dst++ = leading_code;                                          \
 237         coding->produced_char++;                                        \
 238       }                                                                 \
 239     if (leading_code = CHARSET_LEADING_CODE_EXT (charset))              \
 240       *dst++ = leading_code;                                            \
 241     *dst++ = (c) | 0x80;                                                \
 242   } while (0)
 243
 244 /* Decode one DIMENSION2 character whose charset is CHARSET and whose
 245    position-codes are C1 and C2.  */
 246
 247 #define DECODE_CHARACTER_DIMENSION2(charset, c1, c2)    \
 248   do {                                                  \
 249     DECODE_CHARACTER_DIMENSION1 (charset, c1);          \
 250     *dst++ = (c2) | 0x80;                               \
 251   } while (0)
 252
 253 \f
 254 /*** 1. Preamble ***/
 255
 256 #include <stdio.h>
 257
 258 #ifdef emacs
 259
 260 #include <config.h>
 261 #include "lisp.h"
 262 #include "buffer.h"
 263 #include "charset.h"
 264 #include "ccl.h"
 265 #include "coding.h"
 266 #include "window.h"
 267
 268 #else  /* not emacs */
 269
 270 #include "mulelib.h"
 271
 272 #endif /* not emacs */
 273
 274 Lisp_Object Qcoding_system, Qeol_type;
 275 Lisp_Object Qbuffer_file_coding_system;
 276 Lisp_Object Qpost_read_conversion, Qpre_write_conversion;
 277 Lisp_Object Qno_conversion, Qundecided;
 278 Lisp_Object Qcoding_system_history;
 279 Lisp_Object Qsafe_charsets;
 280
 281 extern Lisp_Object Qinsert_file_contents, Qwrite_region;
 282 Lisp_Object Qcall_process, Qcall_process_region, Qprocess_argument;
 283 Lisp_Object Qstart_process, Qopen_network_stream;
 284 Lisp_Object Qtarget_idx;
 285
 286 Lisp_Object Vselect_safe_coding_system_function;
 287
 288 /* Mnemonic character of each format of end-of-line.  */
 289 int eol_mnemonic_unix, eol_mnemonic_dos, eol_mnemonic_mac;
 290 /* Mnemonic character to indicate format of end-of-line is not yet
 291    decided.  */
 292 int eol_mnemonic_undecided;
 293
 294 /* Format of end-of-line decided by system.  This is CODING_EOL_LF on
 295    Unix, CODING_EOL_CRLF on DOS/Windows, and CODING_EOL_CR on Mac.  */
 296 int system_eol_type;
 297
 298 #ifdef emacs
 299
 300 Lisp_Object Vcoding_system_list, Vcoding_system_alist;
 301
 302 Lisp_Object Qcoding_system_p, Qcoding_system_error;
 303
 304 /* Coding system emacs-mule and raw-text are for converting only
 305    end-of-line format.  */
 306 Lisp_Object Qemacs_mule, Qraw_text;
 307
 308 /* Coding-systems are handed between Emacs Lisp programs and C internal
 309    routines by the following three variables.  */
 310 /* Coding-system for reading files and receiving data from process.  */
 311 Lisp_Object Vcoding_system_for_read;
 312 /* Coding-system for writing files and sending data to process.  */
 313 Lisp_Object Vcoding_system_for_write;
 314 /* Coding-system actually used in the latest I/O.  */
 315 Lisp_Object Vlast_coding_system_used;
 316
 317 /* A vector of length 256 which contains information about special
 318    Latin codes (especially for dealing with Microsoft codes).  */
 319 Lisp_Object Vlatin_extra_code_table;
 320
 321 /* Flag to inhibit code conversion of end-of-line format.  */
 322 int inhibit_eol_conversion;
 323
 324 /* Flag to make buffer-file-coding-system inherit from process-coding.  */
 325 int inherit_process_coding_system;
 326
 327 /* Coding system to be used to encode text for terminal display.  */
 328 struct coding_system terminal_coding;
 329
 330 /* Coding system to be used to encode text for terminal display when
 331    terminal coding system is nil.  */
 332 struct coding_system safe_terminal_coding;
 333
 334 /* Coding system of what is sent from terminal keyboard.  */
 335 struct coding_system keyboard_coding;
 336
 337 Lisp_Object Vfile_coding_system_alist;
 338 Lisp_Object Vprocess_coding_system_alist;
 339 Lisp_Object Vnetwork_coding_system_alist;
 340
 341 #endif /* emacs */
 342
 343 Lisp_Object Qcoding_category, Qcoding_category_index;
 344
 345 /* List of symbols `coding-category-xxx' ordered by priority.  */
 346 Lisp_Object Vcoding_category_list;
 347
 348 /* Table of coding categories (Lisp symbols).  */
 349 Lisp_Object Vcoding_category_table;
 350
 351 /* Table of names of symbol for each coding-category.  */
 352 char *coding_category_name[CODING_CATEGORY_IDX_MAX] = {
 353   "coding-category-emacs-mule",
 354   "coding-category-sjis",
 355   "coding-category-iso-7",
 356   "coding-category-iso-7-tight",
 357   "coding-category-iso-8-1",
 358   "coding-category-iso-8-2",
 359   "coding-category-iso-7-else",
 360   "coding-category-iso-8-else",
 361   "coding-category-big5",
 362   "coding-category-raw-text",
 363   "coding-category-binary"
 364 };
 365
 366 /* Table of pointers to coding systems corresponding to each coding
 367    categories.  */
 368 struct coding_system *coding_system_table[CODING_CATEGORY_IDX_MAX];
 369
 370 /* Table of coding category masks.  Nth element is a mask for a coding
 371    cateogry of which priority is Nth.  */
 372 static
 373 int coding_priorities[CODING_CATEGORY_IDX_MAX];
 374
 375 /* Flag to tell if we look up translation table on character code
 376    conversion.  */
 377 Lisp_Object Venable_character_translation;
 378 /* Standard translation table to look up on decoding (reading).  */
 379 Lisp_Object Vstandard_translation_table_for_decode;
 380 /* Standard translation table to look up on encoding (writing).  */
 381 Lisp_Object Vstandard_translation_table_for_encode;
 382
 383 Lisp_Object Qtranslation_table;
 384 Lisp_Object Qtranslation_table_id;
 385 Lisp_Object Qtranslation_table_for_decode;
 386 Lisp_Object Qtranslation_table_for_encode;
 387
 388 /* Alist of charsets vs revision number.  */
 389 Lisp_Object Vcharset_revision_alist;
 390
 391 /* Default coding systems used for process I/O.  */
 392 Lisp_Object Vdefault_process_coding_system;
 393
 394 \f
 395 /*** 2. Emacs internal format (emacs-mule) handlers ***/
 396
 397 /* Emacs' internal format for encoding multiple character sets is a
 398    kind of multi-byte encoding, i.e. characters are encoded by
 399    variable-length sequences of one-byte codes.  ASCII characters
 400    and control characters (e.g. `tab', `newline') are represented by
 401    one-byte sequences which are their ASCII codes, in the range 0x00
 402    through 0x7F.  The other characters are represented by a sequence
 403    of `base leading-code', optional `extended leading-code', and one
 404    or two `position-code's.  The length of the sequence is determined
 405    by the base leading-code.  Leading-code takes the range 0x80
 406    through 0x9F, whereas extended leading-code and position-code take
 407    the range 0xA0 through 0xFF.  See `charset.h' for more details
 408    about leading-code and position-code.
 409
 410    There's one exception to this rule.  Special leading-code
 411    `leading-code-composition' denotes that the following several
 412    characters should be composed into one character.  Leading-codes of
 413    components (except for ASCII) are added 0x20.  An ASCII character
 414    component is represented by a 2-byte sequence of `0xA0' and
 415    `ASCII-code + 0x80'.  See also the comments in `charset.h' for the
 416    details of composite character.  Hence, we can summarize the code
 417    range as follows:
 418
 419    --- CODE RANGE of Emacs' internal format ---
 420    (character set)      (range)
 421    ASCII                0x00 .. 0x7F
 422    ELSE (1st byte)      0x80 .. 0x9F
 423         (rest bytes)    0xA0 .. 0xFF
 424    ---------------------------------------------
 425
 426   */
 427
 428 enum emacs_code_class_type emacs_code_class[256];
 429
 430 /* Go to the next statement only if *SRC is accessible and the code is
 431    greater than 0xA0.  */
 432 #define CHECK_CODE_RANGE_A0_FF  \
 433   do {                          \
 434     if (src >= src_end)         \
 435       goto label_end_of_switch; \
 436     else if (*src++ < 0xA0)     \
 437       return 0;                 \
 438   } while (0)
 439
 440 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 441    Check if a text is encoded in Emacs' internal format.  If it is,
 442    return CODING_CATEGORY_MASK_EMACS_MULE, else return 0.  */
 443
 444 int
 445 detect_coding_emacs_mule (src, src_end)
 446      unsigned char *src, *src_end;
 447 {
 448   unsigned char c;
 449   int composing = 0;
 450
 451   while (src < src_end)
 452     {
 453       c = *src++;
 454
 455       if (composing)
 456         {
 457           if (c < 0xA0)
 458             composing = 0;
 459           else
 460             c -= 0x20;
 461         }
 462
 463       switch (emacs_code_class[c])
 464         {
 465         case EMACS_ascii_code:
 466         case EMACS_linefeed_code:
 467           break;
 468
 469         case EMACS_control_code:
 470           if (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO)
 471             return 0;
 472           break;
 473
 474         case EMACS_invalid_code:
 475           return 0;
 476
 477         case EMACS_leading_code_composition: /* c == 0x80 */
 478           if (composing)
 479             CHECK_CODE_RANGE_A0_FF;
 480           else
 481             composing = 1;
 482           break;
 483
 484         case EMACS_leading_code_4:
 485           CHECK_CODE_RANGE_A0_FF;
 486           /* fall down to check it two more times ...  */
 487
 488         case EMACS_leading_code_3:
 489           CHECK_CODE_RANGE_A0_FF;
 490           /* fall down to check it one more time ...  */
 491
 492         case EMACS_leading_code_2:
 493           CHECK_CODE_RANGE_A0_FF;
 494           break;
 495
 496         default:
 497         label_end_of_switch:
 498           break;
 499         }
 500     }
 501   return CODING_CATEGORY_MASK_EMACS_MULE;
 502 }
 503
 504 \f
 505 /*** 3. ISO2022 handlers ***/
 506
 507 /* The following note describes the coding system ISO2022 briefly.
 508    Since the intention of this note is to help in understanding of
 509    the programs in this file, some parts are NOT ACCURATE or OVERLY
 510    SIMPLIFIED.  For the thorough understanding, please refer to the
 511    original document of ISO2022.
 512
 513    ISO2022 provides many mechanisms to encode several character sets
 514    in 7-bit and 8-bit environment.  If one chooses 7-bite environment,
 515    all text is encoded by codes of less than 128.  This may make the
 516    encoded text a little bit longer, but the text gets more stability
 517    to pass through several gateways (some of them strip off the MSB).
 518
 519    There are two kinds of character set: control character set and
 520    graphic character set.  The former contains control characters such
 521    as `newline' and `escape' to provide control functions (control
 522    functions are provided also by escape sequences).  The latter
 523    contains graphic characters such as ' A' and '-'.  Emacs recognizes
 524    two control character sets and many graphic character sets.
 525
 526    Graphic character sets are classified into one of the following
 527    four classes, DIMENSION1_CHARS94, DIMENSION1_CHARS96,
 528    DIMENSION2_CHARS94, DIMENSION2_CHARS96 according to the number of
 529    bytes (DIMENSION) and the number of characters in one dimension
 530    (CHARS) of the set.  In addition, each character set is assigned an
 531    identification tag (called "final character" and denoted as <F>
 532    here after) which is unique in each class.  <F> of each character
 533    set is decided by ECMA(*) when it is registered in ISO.  Code range
 534    of <F> is 0x30..0x7F (0x30..0x3F are for private use only).
 535
 536    Note (*): ECMA = European Computer Manufacturers Association
 537
 538    Here are examples of graphic character set [NAME(<F>)]:
 539         o DIMENSION1_CHARS94 -- ASCII('B'), right-half-of-JISX0201('I'), ...
 540         o DIMENSION1_CHARS96 -- right-half-of-ISO8859-1('A'), ...
 541         o DIMENSION2_CHARS94 -- GB2312('A'), JISX0208('B'), ...
 542         o DIMENSION2_CHARS96 -- none for the moment
 543
 544    A code area (1byte=8bits) is divided into 4 areas, C0, GL, C1, and GR.
 545         C0 [0x00..0x1F] -- control character plane 0
 546         GL [0x20..0x7F] -- graphic character plane 0
 547         C1 [0x80..0x9F] -- control character plane 1
 548         GR [0xA0..0xFF] -- graphic character plane 1
 549
 550    A control character set is directly designated and invoked to C0 or
 551    C1 by an escape sequence.  The most common case is that ISO646's
 552    control character set is designated/invoked to C0 and ISO6429's
 553    control character set is designated/invoked to C1, and usually
 554    these designations/invocations are omitted in a coded text.  With
 555    7-bit environment, only C0 can be used, and a control character for
 556    C1 is encoded by an appropriate escape sequence to fit in the
 557    environment.  All control characters for C1 are defined the
 558    corresponding escape sequences.
 559
 560    A graphic character set is at first designated to one of four
 561    graphic registers (G0 through G3), then these graphic registers are
 562    invoked to GL or GR.  These designations and invocations can be
 563    done independently.  The most common case is that G0 is invoked to
 564    GL, G1 is invoked to GR, and ASCII is designated to G0, and usually
 565    these invocations and designations are omitted in a coded text.
 566    With 7-bit environment, only GL can be used.
 567
 568    When a graphic character set of CHARS94 is invoked to GL, code 0x20
 569    and 0x7F of GL area work as control characters SPACE and DEL
 570    respectively, and code 0xA0 and 0xFF of GR area should not be used.
 571
 572    There are two ways of invocation: locking-shift and single-shift.
 573    With locking-shift, the invocation lasts until the next different
 574    invocation, whereas with single-shift, the invocation works only
 575    for the following character and doesn't affect locking-shift.
 576    Invocations are done by the following control characters or escape
 577    sequences.
 578
 579    ----------------------------------------------------------------------
 580    function             control char    escape sequence description
 581    ----------------------------------------------------------------------
 582    SI  (shift-in)               0x0F    none            invoke G0 to GL
 583    SO  (shift-out)              0x0E    none            invoke G1 to GL
 584    LS2 (locking-shift-2)        none    ESC 'n'         invoke G2 into GL
 585    LS3 (locking-shift-3)        none    ESC 'o'         invoke G3 into GL
 586    SS2 (single-shift-2)         0x8E    ESC 'N'         invoke G2 into GL
 587    SS3 (single-shift-3)         0x8F    ESC 'O'         invoke G3 into GL
 588    ----------------------------------------------------------------------
 589    The first four are for locking-shift.  Control characters for these
 590    functions are defined by macros ISO_CODE_XXX in `coding.h'.
 591
 592    Designations are done by the following escape sequences.
 593    ----------------------------------------------------------------------
 594    escape sequence      description
 595    ----------------------------------------------------------------------
 596    ESC '(' <F>          designate DIMENSION1_CHARS94<F> to G0
 597    ESC ')' <F>          designate DIMENSION1_CHARS94<F> to G1
 598    ESC '*' <F>          designate DIMENSION1_CHARS94<F> to G2
 599    ESC '+' <F>          designate DIMENSION1_CHARS94<F> to G3
 600    ESC ',' <F>          designate DIMENSION1_CHARS96<F> to G0 (*)
 601    ESC '-' <F>          designate DIMENSION1_CHARS96<F> to G1
 602    ESC '.' <F>          designate DIMENSION1_CHARS96<F> to G2
 603    ESC '/' <F>          designate DIMENSION1_CHARS96<F> to G3
 604    ESC '$' '(' <F>      designate DIMENSION2_CHARS94<F> to G0 (**)
 605    ESC '$' ')' <F>      designate DIMENSION2_CHARS94<F> to G1
 606    ESC '$' '*' <F>      designate DIMENSION2_CHARS94<F> to G2
 607    ESC '$' '+' <F>      designate DIMENSION2_CHARS94<F> to G3
 608    ESC '$' ',' <F>      designate DIMENSION2_CHARS96<F> to G0 (*)
 609    ESC '$' '-' <F>      designate DIMENSION2_CHARS96<F> to G1
 610    ESC '$' '.' <F>      designate DIMENSION2_CHARS96<F> to G2
 611    ESC '$' '/' <F>      designate DIMENSION2_CHARS96<F> to G3
 612    ----------------------------------------------------------------------
 613
 614    In this list, "DIMENSION1_CHARS94<F>" means a graphic character set
 615    of dimension 1, chars 94, and final character <F>, and etc.
 616
 617    Note (*): Although these designations are not allowed in ISO2022,
 618    Emacs accepts them on decoding, and produces them on encoding
 619    CHARS96 character set in a coding system which is characterized as
 620    7-bit environment, non-locking-shift, and non-single-shift.
 621
 622    Note (**): If <F> is '@', 'A', or 'B', the intermediate character
 623    '(' can be omitted.  We call this as "short-form" here after.
 624
 625    Now you may notice that there are a lot of ways for encoding the
 626    same multilingual text in ISO2022.  Actually, there exists many
 627    coding systems such as Compound Text (used in X's inter client
 628    communication, ISO-2022-JP (used in Japanese Internet), ISO-2022-KR
 629    (used in Korean Internet), EUC (Extended UNIX Code, used in Asian
 630    localized platforms), and all of these are variants of ISO2022.
 631
 632    In addition to the above, Emacs handles two more kinds of escape
 633    sequences: ISO6429's direction specification and Emacs' private
 634    sequence for specifying character composition.
 635
 636    ISO6429's direction specification takes the following format:
 637         o CSI ']'      -- end of the current direction
 638         o CSI '0' ']'  -- end of the current direction
 639         o CSI '1' ']'  -- start of left-to-right text
 640         o CSI '2' ']'  -- start of right-to-left text
 641    The control character CSI (0x9B: control sequence introducer) is
 642    abbreviated to the escape sequence ESC '[' in 7-bit environment.
 643
 644    Character composition specification takes the following format:
 645         o ESC '0' -- start character composition
 646         o ESC '1' -- end character composition
 647    Since these are not standard escape sequences of any ISO, the use
 648    of them for these meaning is restricted to Emacs only.  */
 649
 650 enum iso_code_class_type iso_code_class[256];
 651
 652 #define CHARSET_OK(idx, charset)                        \
 653   (coding_system_table[idx]->safe_charsets[charset]     \
 654    || (CODING_SPEC_ISO_REQUESTED_DESIGNATION            \
 655        (coding_system_table[idx], charset)              \
 656        != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 657
 658 #define SHIFT_OUT_OK(idx) \
 659   (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding_system_table[idx], 1) >= 0)
 660
 661 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
 662    Check if a text is encoded in ISO2022.  If it is, returns an
 663    integer in which appropriate flag bits any of:
 664         CODING_CATEGORY_MASK_ISO_7
 665         CODING_CATEGORY_MASK_ISO_7_TIGHT
 666         CODING_CATEGORY_MASK_ISO_8_1
 667         CODING_CATEGORY_MASK_ISO_8_2
 668         CODING_CATEGORY_MASK_ISO_7_ELSE
 669         CODING_CATEGORY_MASK_ISO_8_ELSE
 670    are set.  If a code which should never appear in ISO2022 is found,
 671    returns 0.  */
 672
 673 int
 674 detect_coding_iso2022 (src, src_end)
 675      unsigned char *src, *src_end;
 676 {
 677   int mask = CODING_CATEGORY_MASK_ISO;
 678   int mask_found = 0;
 679   int reg[4], shift_out = 0;
 680   int c, c1, i, charset;
 681
 682   reg[0] = CHARSET_ASCII, reg[1] = reg[2] = reg[3] = -1;
 683   while (mask && src < src_end)
 684     {
 685       c = *src++;
 686       switch (c)
 687         {
 688         case ISO_CODE_ESC:
 689           if (src >= src_end)
 690             break;
 691           c = *src++;
 692           if (c >= '(' && c <= '/')
 693             {
 694               /* Designation sequence for a charset of dimension 1.  */
 695               if (src >= src_end)
 696                 break;
 697               c1 = *src++;
 698               if (c1 < ' ' || c1 >= 0x80
 699                   || (charset = iso_charset_table[0][c >= ','][c1]) < 0)
 700                 /* Invalid designation sequence.  Just ignore.  */
 701                 break;
 702               reg[(c - '(') % 4] = charset;
 703             }
 704           else if (c == '$')
 705             {
 706               /* Designation sequence for a charset of dimension 2.  */
 707               if (src >= src_end)
 708                 break;
 709               c = *src++;
 710               if (c >= '@' && c <= 'B')
 711                 /* Designation for JISX0208.1978, GB2312, or JISX0208.  */
 712                 reg[0] = charset = iso_charset_table[1][0][c];
 713               else if (c >= '(' && c <= '/')
 714                 {
 715                   if (src >= src_end)
 716                     break;
 717                   c1 = *src++;
 718                   if (c1 < ' ' || c1 >= 0x80
 719                       || (charset = iso_charset_table[1][c >= ','][c1]) < 0)
 720                     /* Invalid designation sequence.  Just ignore.  */
 721                     break;
 722                   reg[(c - '(') % 4] = charset;
 723                 }
 724               else
 725                 /* Invalid designation sequence.  Just ignore.  */
 726                 break;
 727             }
 728           else if (c == 'N' || c == 'n')
 729             {
 730               if (shift_out == 0
 731                   && (reg[1] >= 0
 732                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 733                       || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 734                 {
 735                   /* Locking shift out.  */
 736                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 737                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 738                   shift_out = 1;
 739                 }
 740               break;
 741             }
 742           else if (c == 'O' || c == 'o')
 743             {
 744               if (shift_out == 1)
 745                 {
 746                   /* Locking shift in.  */
 747                   mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 748                   mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 749                   shift_out = 0;
 750                 }
 751               break;
 752             }
 753           else if (c == '0' || c == '1' || c == '2')
 754             /* Start/end composition.  Just ignore.  */
 755             break;
 756           else
 757             /* Invalid escape sequence.  Just ignore.  */
 758             break;
 759
 760           /* We found a valid designation sequence for CHARSET.  */
 761           mask &= ~CODING_CATEGORY_MASK_ISO_8BIT;
 762           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7, charset))
 763             mask_found |= CODING_CATEGORY_MASK_ISO_7;
 764           else
 765             mask &= ~CODING_CATEGORY_MASK_ISO_7;
 766           if (CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_TIGHT, charset))
 767             mask_found |= CODING_CATEGORY_MASK_ISO_7_TIGHT;
 768           else
 769             mask &= ~CODING_CATEGORY_MASK_ISO_7_TIGHT;
 770           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_7_ELSE, charset))
 771             mask &= ~CODING_CATEGORY_MASK_ISO_7_ELSE;
 772           if (! CHARSET_OK (CODING_CATEGORY_IDX_ISO_8_ELSE, charset))
 773             mask &= ~CODING_CATEGORY_MASK_ISO_8_ELSE;
 774           break;
 775
 776         case ISO_CODE_SO:
 777           if (shift_out == 0
 778               && (reg[1] >= 0
 779                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_7_ELSE)
 780                   || SHIFT_OUT_OK (CODING_CATEGORY_IDX_ISO_8_ELSE)))
 781             {
 782               /* Locking shift out.  */
 783               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 784               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 785             }
 786           break;
 787
 788         case ISO_CODE_SI:
 789           if (shift_out == 1)
 790             {
 791               /* Locking shift in.  */
 792               mask &= ~CODING_CATEGORY_MASK_ISO_7BIT;
 793               mask_found |= CODING_CATEGORY_MASK_ISO_SHIFT;
 794             }
 795           break;
 796
 797         case ISO_CODE_CSI:
 798         case ISO_CODE_SS2:
 799         case ISO_CODE_SS3:
 800           {
 801             int newmask = CODING_CATEGORY_MASK_ISO_8_ELSE;
 802
 803             if (c != ISO_CODE_CSI)
 804               {
 805                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 806                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 807                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 808                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 809                     & CODING_FLAG_ISO_SINGLE_SHIFT)
 810                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 811               }
 812             if (VECTORP (Vlatin_extra_code_table)
 813                 && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 814               {
 815                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 816                     & CODING_FLAG_ISO_LATIN_EXTRA)
 817                   newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 818                 if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 819                     & CODING_FLAG_ISO_LATIN_EXTRA)
 820                   newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 821               }
 822             mask &= newmask;
 823             mask_found |= newmask;
 824           }
 825           break;
 826
 827         default:
 828           if (c < 0x80)
 829             break;
 830           else if (c < 0xA0)
 831             {
 832               if (VECTORP (Vlatin_extra_code_table)
 833                   && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
 834                 {
 835                   int newmask = 0;
 836
 837                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_1]->flags
 838                       & CODING_FLAG_ISO_LATIN_EXTRA)
 839                     newmask |= CODING_CATEGORY_MASK_ISO_8_1;
 840                   if (coding_system_table[CODING_CATEGORY_IDX_ISO_8_2]->flags
 841                       & CODING_FLAG_ISO_LATIN_EXTRA)
 842                     newmask |= CODING_CATEGORY_MASK_ISO_8_2;
 843                   mask &= newmask;
 844                   mask_found |= newmask;
 845                 }
 846               else
 847                 return 0;
 848             }
 849           else
 850             {
 851               unsigned char *src_begin = src;
 852
 853               mask &= ~(CODING_CATEGORY_MASK_ISO_7BIT
 854                         | CODING_CATEGORY_MASK_ISO_7_ELSE);
 855               mask_found |= CODING_CATEGORY_MASK_ISO_8_1;
 856               while (src < src_end && *src >= 0xA0)
 857                 src++;
 858               if ((src - src_begin - 1) & 1 && src < src_end)
 859                 mask &= ~CODING_CATEGORY_MASK_ISO_8_2;
 860               else
 861                 mask_found |= CODING_CATEGORY_MASK_ISO_8_2;
 862             }
 863           break;
 864         }
 865     }
 866
 867   return (mask & mask_found);
 868 }
 869
 870 /* Decode a character of which charset is CHARSET and the 1st position
 871    code is C1.  If dimension of CHARSET is 2, the 2nd position code is
 872    fetched from SRC and set to C2.  If CHARSET is negative, it means
 873    that we are decoding ill formed text, and what we can do is just to
 874    read C1 as is.  */
 875
 876 #define DECODE_ISO_CHARACTER(charset, c1)                               \
 877   do {                                                                  \
 878     int c_alt, charset_alt = (charset);                                 \
 879     if (COMPOSING_HEAD_P (coding->composing))                           \
 880       {                                                                 \
 881         *dst++ = LEADING_CODE_COMPOSITION;                              \
 882         if (COMPOSING_WITH_RULE_P (coding->composing))                  \
 883           /* To tell composition rules are embeded.  */                 \
 884           *dst++ = 0xFF;                                                \
 885         coding->composing += 2;                                         \
 886       }                                                                 \
 887     if ((charset) >= 0)                                                 \
 888       {                                                                 \
 889         if (CHARSET_DIMENSION (charset) == 2)                           \
 890           {                                                             \
 891             ONE_MORE_BYTE (c2);                                         \
 892             if (iso_code_class[(c2) & 0x7F] != ISO_0x20_or_0x7F         \
 893                 && iso_code_class[(c2) & 0x7F] != ISO_graphic_plane_0)  \
 894               {                                                         \
 895                 src--;                                                  \
 896                 c2 = ' ';                                               \
 897               }                                                         \
 898           }                                                             \
 899         if (!NILP (translation_table)                                   \
 900             && ((c_alt = translate_char (translation_table,             \
 901                                          -1, (charset), c1, c2)) >= 0)) \
 902           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
 903       }                                                                 \
 904     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
 905       DECODE_CHARACTER_ASCII (c1);                                      \
 906     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
 907       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
 908     else                                                                \
 909       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
 910     if (COMPOSING_WITH_RULE_P (coding->composing))                      \
 911       /* To tell a composition rule follows.  */                        \
 912       coding->composing = COMPOSING_WITH_RULE_RULE;                     \
 913   } while (0)
 914
 915 /* Set designation state into CODING.  */
 916 #define DECODE_DESIGNATION(reg, dimension, chars, final_char)              \
 917   do {                                                                     \
 918     int charset = ISO_CHARSET_TABLE (make_number (dimension),              \
 919                                      make_number (chars),                  \
 920                                      make_number (final_char));            \
 921     if (charset >= 0                                                       \
 922         && (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) == reg \
 923             || coding->safe_charsets[charset]))                            \
 924       {                                                                    \
 925         if (coding->spec.iso2022.last_invalid_designation_register == 0    \
 926             && reg == 0                                                    \
 927             && charset == CHARSET_ASCII)                                   \
 928           {                                                                \
 929             /* We should insert this designation sequence as is so         \
 930                that it is surely written back to a file.  */               \
 931             coding->spec.iso2022.last_invalid_designation_register = -1;   \
 932             goto label_invalid_code;                                       \
 933           }                                                                \
 934         coding->spec.iso2022.last_invalid_designation_register = -1;       \
 935         if ((coding->mode & CODING_MODE_DIRECTION)                         \
 936             && CHARSET_REVERSE_CHARSET (charset) >= 0)                     \
 937           charset = CHARSET_REVERSE_CHARSET (charset);                     \
 938         CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;               \
 939       }                                                                    \
 940     else                                                                   \
 941       {                                                                    \
 942         coding->spec.iso2022.last_invalid_designation_register = reg;      \
 943         goto label_invalid_code;                                           \
 944       }                                                                    \
 945   } while (0)
 946
 947 /* Check if the current composing sequence contains only valid codes.
 948    If the composing sequence doesn't end before SRC_END, return -1.
 949    Else, if it contains only valid codes, return 0.
 950    Else return the length of the composing sequence.  */
 951
 952 int
 953 check_composing_code (coding, src, src_end)
 954      struct coding_system *coding;
 955      unsigned char *src, *src_end;
 956 {
 957   unsigned char *src_start = src;
 958   int invalid_code_found = 0;
 959   int charset, c, c1, dim;
 960
 961   while (src < src_end)
 962     {
 963       if (*src++ != ISO_CODE_ESC) continue;
 964       if (src >= src_end) break;
 965       if ((c = *src++) == '1') /* end of compsition */
 966         return (invalid_code_found ? src - src_start : 0);
 967       if (src + 2 >= src_end) break;
 968       if (!coding->flags & CODING_FLAG_ISO_DESIGNATION)
 969         invalid_code_found = 1;
 970       else
 971         {
 972           dim = 0;
 973           if (c == '$')
 974             {
 975               dim = 1;
 976               c = (*src >= '@' && *src <= 'B') ? '(' : *src++;
 977             }
 978           if (c >= '(' && c <= '/')
 979             {
 980               c1 = *src++;
 981               if ((c1 < ' ' || c1 >= 0x80)
 982                   || (charset = iso_charset_table[dim][c >= ','][c1]) < 0
 983                   || ! coding->safe_charsets[charset]
 984                   || (CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
 985                       == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION))
 986                 invalid_code_found = 1;
 987             }
 988           else
 989             invalid_code_found = 1;
 990         }
 991     }
 992   return (invalid_code_found
 993           ? src - src_start
 994           : (coding->mode & CODING_MODE_LAST_BLOCK ? 0 : -1));
 995 }
 996
 997 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".  */
 998
 999 int
1000 decode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1001      struct coding_system *coding;
1002      unsigned char *source, *destination;
1003      int src_bytes, dst_bytes;
1004 {
1005   unsigned char *src = source;
1006   unsigned char *src_end = source + src_bytes;
1007   unsigned char *dst = destination;
1008   unsigned char *dst_end = destination + dst_bytes;
1009   /* Since the maximum bytes produced by each loop is 7, we subtract 6
1010      from DST_END to assure that overflow checking is necessary only
1011      at the head of loop.  */
1012   unsigned char *adjusted_dst_end = dst_end - 6;
1013   int charset;
1014   /* Charsets invoked to graphic plane 0 and 1 respectively.  */
1015   int charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1016   int charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1017   Lisp_Object translation_table
1018     = coding->translation_table_for_decode;
1019   int result = CODING_FINISH_NORMAL;
1020
1021   if (!NILP (Venable_character_translation) && NILP (translation_table))
1022     translation_table = Vstandard_translation_table_for_decode;
1023
1024   coding->produced_char = 0;
1025   coding->fake_multibyte = 0;
1026   while (src < src_end && (dst_bytes
1027                            ? (dst < adjusted_dst_end)
1028                            : (dst < src - 6)))
1029     {
1030       /* SRC_BASE remembers the start position in source in each loop.
1031          The loop will be exited when there's not enough source text
1032          to analyze long escape sequence or 2-byte code (within macros
1033          ONE_MORE_BYTE or TWO_MORE_BYTES).  In that case, SRC is reset
1034          to SRC_BASE before exiting.  */
1035       unsigned char *src_base = src;
1036       int c1 = *src++, c2;
1037
1038       switch (iso_code_class [c1])
1039         {
1040         case ISO_0x20_or_0x7F:
1041           if (!coding->composing
1042               && (charset0 < 0 || CHARSET_CHARS (charset0) == 94))
1043             {
1044               /* This is SPACE or DEL.  */
1045               *dst++ = c1;
1046               coding->produced_char++;
1047               break;
1048             }
1049           /* This is a graphic character, we fall down ...  */
1050
1051         case ISO_graphic_plane_0:
1052           if (coding->composing == COMPOSING_WITH_RULE_RULE)
1053             {
1054               /* This is a composition rule.  */
1055               *dst++ = c1 | 0x80;
1056               coding->composing = COMPOSING_WITH_RULE_TAIL;
1057             }
1058           else
1059             DECODE_ISO_CHARACTER (charset0, c1);
1060           break;
1061
1062         case ISO_0xA0_or_0xFF:
1063           if (charset1 < 0 || CHARSET_CHARS (charset1) == 94
1064               || coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1065             goto label_invalid_code;
1066           /* This is a graphic character, we fall down ... */
1067
1068         case ISO_graphic_plane_1:
1069           if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
1070             goto label_invalid_code;
1071           else
1072             DECODE_ISO_CHARACTER (charset1, c1);
1073           break;
1074
1075         case ISO_control_code:
1076           /* All ISO2022 control characters in this class have the
1077              same representation in Emacs internal format.  */
1078           if (c1 == '\n'
1079               && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1080               && (coding->eol_type == CODING_EOL_CR
1081                   || coding->eol_type == CODING_EOL_CRLF))
1082             {
1083               result = CODING_FINISH_INCONSISTENT_EOL;
1084               goto label_end_of_loop_2;
1085             }
1086           *dst++ = c1;
1087           coding->produced_char++;
1088           break;
1089
1090         case ISO_carriage_return:
1091           if (coding->eol_type == CODING_EOL_CR)
1092             *dst++ = '\n';
1093           else if (coding->eol_type == CODING_EOL_CRLF)
1094             {
1095               ONE_MORE_BYTE (c1);
1096               if (c1 == ISO_CODE_LF)
1097                 *dst++ = '\n';
1098               else
1099                 {
1100                   if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
1101                     {
1102                       result = CODING_FINISH_INCONSISTENT_EOL;
1103                       goto label_end_of_loop_2;
1104                     }
1105                   src--;
1106                   *dst++ = '\r';
1107                 }
1108             }
1109           else
1110             *dst++ = c1;
1111           coding->produced_char++;
1112           break;
1113
1114         case ISO_shift_out:
1115           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1116               || CODING_SPEC_ISO_DESIGNATION (coding, 1) < 0)
1117             goto label_invalid_code;
1118           CODING_SPEC_ISO_INVOCATION (coding, 0) = 1;
1119           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1120           break;
1121
1122         case ISO_shift_in:
1123           if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
1124             goto label_invalid_code;
1125           CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
1126           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1127           break;
1128
1129         case ISO_single_shift_2_7:
1130         case ISO_single_shift_2:
1131           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1132             goto label_invalid_code;
1133           /* SS2 is handled as an escape sequence of ESC 'N' */
1134           c1 = 'N';
1135           goto label_escape_sequence;
1136
1137         case ISO_single_shift_3:
1138           if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
1139             goto label_invalid_code;
1140           /* SS2 is handled as an escape sequence of ESC 'O' */
1141           c1 = 'O';
1142           goto label_escape_sequence;
1143
1144         case ISO_control_sequence_introducer:
1145           /* CSI is handled as an escape sequence of ESC '[' ...  */
1146           c1 = '[';
1147           goto label_escape_sequence;
1148
1149         case ISO_escape:
1150           ONE_MORE_BYTE (c1);
1151         label_escape_sequence:
1152           /* Escape sequences handled by Emacs are invocation,
1153              designation, direction specification, and character
1154              composition specification.  */
1155           switch (c1)
1156             {
1157             case '&':           /* revision of following character set */
1158               ONE_MORE_BYTE (c1);
1159               if (!(c1 >= '@' && c1 <= '~'))
1160                 goto label_invalid_code;
1161               ONE_MORE_BYTE (c1);
1162               if (c1 != ISO_CODE_ESC)
1163                 goto label_invalid_code;
1164               ONE_MORE_BYTE (c1);
1165               goto label_escape_sequence;
1166
1167             case '$':           /* designation of 2-byte character set */
1168               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1169                 goto label_invalid_code;
1170               ONE_MORE_BYTE (c1);
1171               if (c1 >= '@' && c1 <= 'B')
1172                 {       /* designation of JISX0208.1978, GB2312.1980,
1173                                    or JISX0208.1980 */
1174                   DECODE_DESIGNATION (0, 2, 94, c1);
1175                 }
1176               else if (c1 >= 0x28 && c1 <= 0x2B)
1177                 {       /* designation of DIMENSION2_CHARS94 character set */
1178                   ONE_MORE_BYTE (c2);
1179                   DECODE_DESIGNATION (c1 - 0x28, 2, 94, c2);
1180                 }
1181               else if (c1 >= 0x2C && c1 <= 0x2F)
1182                 {       /* designation of DIMENSION2_CHARS96 character set */
1183                   ONE_MORE_BYTE (c2);
1184                   DECODE_DESIGNATION (c1 - 0x2C, 2, 96, c2);
1185                 }
1186               else
1187                 goto label_invalid_code;
1188               break;
1189
1190             case 'n':           /* invocation of locking-shift-2 */
1191               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1192                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1193                 goto label_invalid_code;
1194               CODING_SPEC_ISO_INVOCATION (coding, 0) = 2;
1195               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1196               break;
1197
1198             case 'o':           /* invocation of locking-shift-3 */
1199               if (! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT)
1200                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1201                 goto label_invalid_code;
1202               CODING_SPEC_ISO_INVOCATION (coding, 0) = 3;
1203               charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1204               break;
1205
1206             case 'N':           /* invocation of single-shift-2 */
1207               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1208                   || CODING_SPEC_ISO_DESIGNATION (coding, 2) < 0)
1209                 goto label_invalid_code;
1210               ONE_MORE_BYTE (c1);
1211               charset = CODING_SPEC_ISO_DESIGNATION (coding, 2);
1212               DECODE_ISO_CHARACTER (charset, c1);
1213               break;
1214
1215             case 'O':           /* invocation of single-shift-3 */
1216               if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1217                   || CODING_SPEC_ISO_DESIGNATION (coding, 3) < 0)
1218                 goto label_invalid_code;
1219               ONE_MORE_BYTE (c1);
1220               charset = CODING_SPEC_ISO_DESIGNATION (coding, 3);
1221               DECODE_ISO_CHARACTER (charset, c1);
1222               break;
1223
1224             case '0': case '2': /* start composing */
1225               /* Before processing composing, we must be sure that all
1226                  characters being composed are supported by CODING.
1227                  If not, we must give up composing and insert the
1228                  bunch of codes for composing as is without decoding.  */
1229               {
1230                 int result1;
1231
1232                 result1 = check_composing_code (coding, src, src_end);
1233                 if (result1 == 0)
1234                   {
1235                     coding->composing = (c1 == '0'
1236                                          ? COMPOSING_NO_RULE_HEAD
1237                                          : COMPOSING_WITH_RULE_HEAD);
1238                     coding->produced_char++;
1239                   }
1240                 else if (result1 > 0)
1241                   {
1242                     if (result1 + 2 < (dst_bytes ? dst_end : src_base) - dst)
1243                       {
1244                         bcopy (src_base, dst, result1 + 2);
1245                         src += result1;
1246                         dst += result1 + 2;
1247                         coding->produced_char += result1 + 2;
1248                       }
1249                     else
1250                       {
1251                         result = CODING_FINISH_INSUFFICIENT_DST;
1252                         goto label_end_of_loop_2;
1253                       }
1254                   }
1255                 else
1256                   goto label_end_of_loop;
1257               }
1258               break;
1259
1260             case '1':           /* end composing */
1261               coding->composing = COMPOSING_NO;
1262               break;
1263
1264             case '[':           /* specification of direction */
1265               if (coding->flags & CODING_FLAG_ISO_NO_DIRECTION)
1266                 goto label_invalid_code;
1267               /* For the moment, nested direction is not supported.
1268                  So, `coding->mode & CODING_MODE_DIRECTION' zero means
1269                  left-to-right, and nozero means right-to-left.  */
1270               ONE_MORE_BYTE (c1);
1271               switch (c1)
1272                 {
1273                 case ']':       /* end of the current direction */
1274                   coding->mode &= ~CODING_MODE_DIRECTION;
1275
1276                 case '0':       /* end of the current direction */
1277                 case '1':       /* start of left-to-right direction */
1278                   ONE_MORE_BYTE (c1);
1279                   if (c1 == ']')
1280                     coding->mode &= ~CODING_MODE_DIRECTION;
1281                   else
1282                     goto label_invalid_code;
1283                   break;
1284
1285                 case '2':       /* start of right-to-left direction */
1286                   ONE_MORE_BYTE (c1);
1287                   if (c1 == ']')
1288                     coding->mode |= CODING_MODE_DIRECTION;
1289                   else
1290                     goto label_invalid_code;
1291                   break;
1292
1293                 default:
1294                   goto label_invalid_code;
1295                 }
1296               break;
1297
1298             default:
1299               if (! (coding->flags & CODING_FLAG_ISO_DESIGNATION))
1300                 goto label_invalid_code;
1301               if (c1 >= 0x28 && c1 <= 0x2B)
1302                 {       /* designation of DIMENSION1_CHARS94 character set */
1303                   ONE_MORE_BYTE (c2);
1304                   DECODE_DESIGNATION (c1 - 0x28, 1, 94, c2);
1305                 }
1306               else if (c1 >= 0x2C && c1 <= 0x2F)
1307                 {       /* designation of DIMENSION1_CHARS96 character set */
1308                   ONE_MORE_BYTE (c2);
1309                   DECODE_DESIGNATION (c1 - 0x2C, 1, 96, c2);
1310                 }
1311               else
1312                 {
1313                   goto label_invalid_code;
1314                 }
1315             }
1316           /* We must update these variables now.  */
1317           charset0 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 0);
1318           charset1 = CODING_SPEC_ISO_PLANE_CHARSET (coding, 1);
1319           break;
1320
1321         label_invalid_code:
1322           while (src_base < src)
1323             *dst++ = *src_base++;
1324           coding->fake_multibyte = 1;
1325         }
1326       continue;
1327
1328     label_end_of_loop:
1329       result = CODING_FINISH_INSUFFICIENT_SRC;
1330     label_end_of_loop_2:
1331       src = src_base;
1332       break;
1333     }
1334
1335   if (src < src_end)
1336     {
1337       if (result == CODING_FINISH_NORMAL)
1338         result = CODING_FINISH_INSUFFICIENT_DST;
1339       else if (result != CODING_FINISH_INCONSISTENT_EOL
1340                && coding->mode & CODING_MODE_LAST_BLOCK)
1341         {
1342           /* This is the last block of the text to be decoded.  We had
1343              better just flush out all remaining codes in the text
1344              although they are not valid characters.  */
1345           src_bytes = src_end - src;
1346           if (dst_bytes && (dst_end - dst < src_bytes))
1347             src_bytes = dst_end - dst;
1348           bcopy (src, dst, src_bytes);
1349           dst += src_bytes;
1350           src += src_bytes;
1351           coding->fake_multibyte = 1;
1352         }
1353     }
1354
1355   coding->consumed = coding->consumed_char = src - source;
1356   coding->produced = dst - destination;
1357   return result;
1358 }
1359
1360 /* ISO2022 encoding stuff.  */
1361
1362 /*
1363    It is not enough to say just "ISO2022" on encoding, we have to
1364    specify more details.  In Emacs, each coding system of ISO2022
1365    variant has the following specifications:
1366         1. Initial designation to G0 thru G3.
1367         2. Allows short-form designation?
1368         3. ASCII should be designated to G0 before control characters?
1369         4. ASCII should be designated to G0 at end of line?
1370         5. 7-bit environment or 8-bit environment?
1371         6. Use locking-shift?
1372         7. Use Single-shift?
1373    And the following two are only for Japanese:
1374         8. Use ASCII in place of JIS0201-1976-Roman?
1375         9. Use JISX0208-1983 in place of JISX0208-1978?
1376    These specifications are encoded in `coding->flags' as flag bits
1377    defined by macros CODING_FLAG_ISO_XXX.  See `coding.h' for more
1378    details.
1379 */
1380
1381 /* Produce codes (escape sequence) for designating CHARSET to graphic
1382    register REG.  If <final-char> of CHARSET is '@', 'A', or 'B' and
1383    the coding system CODING allows, produce designation sequence of
1384    short-form.  */
1385
1386 #define ENCODE_DESIGNATION(charset, reg, coding)                        \
1387   do {                                                                  \
1388     unsigned char final_char = CHARSET_ISO_FINAL_CHAR (charset);        \
1389     char *intermediate_char_94 = "()*+";                                \
1390     char *intermediate_char_96 = ",-./";                                \
1391     int revision = CODING_SPEC_ISO_REVISION_NUMBER(coding, charset);    \
1392     if (revision < 255)                                                 \
1393       {                                                                 \
1394         *dst++ = ISO_CODE_ESC;                                          \
1395         *dst++ = '&';                                                   \
1396         *dst++ = '@' + revision;                                        \
1397       }                                                                 \
1398     *dst++ = ISO_CODE_ESC;                                              \
1399     if (CHARSET_DIMENSION (charset) == 1)                               \
1400       {                                                                 \
1401         if (CHARSET_CHARS (charset) == 94)                              \
1402           *dst++ = (unsigned char) (intermediate_char_94[reg]);         \
1403         else                                                            \
1404           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1405       }                                                                 \
1406     else                                                                \
1407       {                                                                 \
1408         *dst++ = '$';                                                   \
1409         if (CHARSET_CHARS (charset) == 94)                              \
1410           {                                                             \
1411             if (! (coding->flags & CODING_FLAG_ISO_SHORT_FORM)          \
1412                 || reg != 0                                             \
1413                 || final_char < '@' || final_char > 'B')                \
1414               *dst++ = (unsigned char) (intermediate_char_94[reg]);     \
1415           }                                                             \
1416         else                                                            \
1417           *dst++ = (unsigned char) (intermediate_char_96[reg]);         \
1418       }                                                                 \
1419     *dst++ = final_char;                                                \
1420     CODING_SPEC_ISO_DESIGNATION (coding, reg) = charset;                \
1421   } while (0)
1422
1423 /* The following two macros produce codes (control character or escape
1424    sequence) for ISO2022 single-shift functions (single-shift-2 and
1425    single-shift-3).  */
1426
1427 #define ENCODE_SINGLE_SHIFT_2                           \
1428   do {                                                  \
1429     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1430       *dst++ = ISO_CODE_ESC, *dst++ = 'N';              \
1431     else                                                \
1432       {                                                 \
1433         *dst++ = ISO_CODE_SS2;                          \
1434         coding->fake_multibyte = 1;                     \
1435       }                                                 \
1436     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1437   } while (0)
1438
1439 #define ENCODE_SINGLE_SHIFT_3                           \
1440   do {                                                  \
1441     if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)     \
1442       *dst++ = ISO_CODE_ESC, *dst++ = 'O';              \
1443     else                                                \
1444       {                                                 \
1445         *dst++ = ISO_CODE_SS3;                          \
1446         coding->fake_multibyte = 1;                     \
1447       }                                                 \
1448     CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 1;       \
1449   } while (0)
1450
1451 /* The following four macros produce codes (control character or
1452    escape sequence) for ISO2022 locking-shift functions (shift-in,
1453    shift-out, locking-shift-2, and locking-shift-3).  */
1454
1455 #define ENCODE_SHIFT_IN                         \
1456   do {                                          \
1457     *dst++ = ISO_CODE_SI;                       \
1458     CODING_SPEC_ISO_INVOCATION (coding, 0) = 0; \
1459   } while (0)
1460
1461 #define ENCODE_SHIFT_OUT                        \
1462   do {                                          \
1463     *dst++ = ISO_CODE_SO;                       \
1464     CODING_SPEC_ISO_INVOCATION (coding, 0) = 1; \
1465   } while (0)
1466
1467 #define ENCODE_LOCKING_SHIFT_2                  \
1468   do {                                          \
1469     *dst++ = ISO_CODE_ESC, *dst++ = 'n';        \
1470     CODING_SPEC_ISO_INVOCATION (coding, 0) = 2; \
1471   } while (0)
1472
1473 #define ENCODE_LOCKING_SHIFT_3                  \
1474   do {                                          \
1475     *dst++ = ISO_CODE_ESC, *dst++ = 'o';        \
1476     CODING_SPEC_ISO_INVOCATION (coding, 0) = 3; \
1477   } while (0)
1478
1479 /* Produce codes for a DIMENSION1 character whose character set is
1480    CHARSET and whose position-code is C1.  Designation and invocation
1481    sequences are also produced in advance if necessary.  */
1482
1483
1484 #define ENCODE_ISO_CHARACTER_DIMENSION1(charset, c1)                    \
1485   do {                                                                  \
1486     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1487       {                                                                 \
1488         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1489           *dst++ = c1 & 0x7F;                                           \
1490         else                                                            \
1491           *dst++ = c1 | 0x80;                                           \
1492         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1493         break;                                                          \
1494       }                                                                 \
1495     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1496       {                                                                 \
1497         *dst++ = c1 & 0x7F;                                             \
1498         break;                                                          \
1499       }                                                                 \
1500     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1501       {                                                                 \
1502         *dst++ = c1 | 0x80;                                             \
1503         break;                                                          \
1504       }                                                                 \
1505     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1506              && !coding->safe_charsets[charset])                        \
1507       {                                                                 \
1508         /* We should not encode this character, instead produce one or  \
1509            two `?'s.  */                                                \
1510         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1511         if (CHARSET_WIDTH (charset) == 2)                               \
1512           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1513         break;                                                          \
1514       }                                                                 \
1515     else                                                                \
1516       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1517          must invoke it, or, at first, designate it to some graphic     \
1518          register.  Then repeat the loop to actually produce the        \
1519          character.  */                                                 \
1520       dst = encode_invocation_designation (charset, coding, dst);       \
1521   } while (1)
1522
1523 /* Produce codes for a DIMENSION2 character whose character set is
1524    CHARSET and whose position-codes are C1 and C2.  Designation and
1525    invocation codes are also produced in advance if necessary.  */
1526
1527 #define ENCODE_ISO_CHARACTER_DIMENSION2(charset, c1, c2)                \
1528   do {                                                                  \
1529     if (CODING_SPEC_ISO_SINGLE_SHIFTING (coding))                       \
1530       {                                                                 \
1531         if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)                 \
1532           *dst++ = c1 & 0x7F, *dst++ = c2 & 0x7F;                       \
1533         else                                                            \
1534           *dst++ = c1 | 0x80, *dst++ = c2 | 0x80;                       \
1535         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;                   \
1536         break;                                                          \
1537       }                                                                 \
1538     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 0))      \
1539       {                                                                 \
1540         *dst++ = c1 & 0x7F, *dst++= c2 & 0x7F;                          \
1541         break;                                                          \
1542       }                                                                 \
1543     else if (charset == CODING_SPEC_ISO_PLANE_CHARSET (coding, 1))      \
1544       {                                                                 \
1545         *dst++ = c1 | 0x80, *dst++= c2 | 0x80;                          \
1546         break;                                                          \
1547       }                                                                 \
1548     else if (coding->flags & CODING_FLAG_ISO_SAFE                       \
1549              && !coding->safe_charsets[charset])                        \
1550       {                                                                 \
1551         /* We should not encode this character, instead produce one or  \
1552            two `?'s.  */                                                \
1553         *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;                 \
1554         if (CHARSET_WIDTH (charset) == 2)                               \
1555           *dst++ = CODING_INHIBIT_CHARACTER_SUBSTITUTION;               \
1556         break;                                                          \
1557       }                                                                 \
1558     else                                                                \
1559       /* Since CHARSET is not yet invoked to any graphic planes, we     \
1560          must invoke it, or, at first, designate it to some graphic     \
1561          register.  Then repeat the loop to actually produce the        \
1562          character.  */                                                 \
1563       dst = encode_invocation_designation (charset, coding, dst);       \
1564   } while (1)
1565
1566 #define ENCODE_ISO_CHARACTER(charset, c1, c2)                   \
1567   do {                                                          \
1568     int c_alt, charset_alt;                                     \
1569     if (!NILP (translation_table)                               \
1570         && ((c_alt = translate_char (translation_table, -1,     \
1571                                      charset, c1, c2))          \
1572             >= 0))                                              \
1573       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
1574     else                                                        \
1575       charset_alt = charset;                                    \
1576     if (CHARSET_DIMENSION (charset_alt) == 1)                   \
1577       {                                                         \
1578         if (charset == CHARSET_ASCII                            \
1579             && coding->flags & CODING_FLAG_ISO_USE_ROMAN)       \
1580           charset_alt = charset_latin_jisx0201;                 \
1581         ENCODE_ISO_CHARACTER_DIMENSION1 (charset_alt, c1);      \
1582       }                                                         \
1583     else                                                        \
1584       {                                                         \
1585         if (charset == charset_jisx0208                         \
1586             && coding->flags & CODING_FLAG_ISO_USE_OLDJIS)      \
1587           charset_alt = charset_jisx0208_1978;                  \
1588         ENCODE_ISO_CHARACTER_DIMENSION2 (charset_alt, c1, c2);  \
1589       }                                                         \
1590     if (! COMPOSING_P (coding->composing))                      \
1591       coding->consumed_char++;                                  \
1592   } while (0)
1593
1594 /* Produce designation and invocation codes at a place pointed by DST
1595    to use CHARSET.  The element `spec.iso2022' of *CODING is updated.
1596    Return new DST.  */
1597
1598 unsigned char *
1599 encode_invocation_designation (charset, coding, dst)
1600      int charset;
1601      struct coding_system *coding;
1602      unsigned char *dst;
1603 {
1604   int reg;                      /* graphic register number */
1605
1606   /* At first, check designations.  */
1607   for (reg = 0; reg < 4; reg++)
1608     if (charset == CODING_SPEC_ISO_DESIGNATION (coding, reg))
1609       break;
1610
1611   if (reg >= 4)
1612     {
1613       /* CHARSET is not yet designated to any graphic registers.  */
1614       /* At first check the requested designation.  */
1615       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1616       if (reg == CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION)
1617         /* Since CHARSET requests no special designation, designate it
1618            to graphic register 0.  */
1619         reg = 0;
1620
1621       ENCODE_DESIGNATION (charset, reg, coding);
1622     }
1623
1624   if (CODING_SPEC_ISO_INVOCATION (coding, 0) != reg
1625       && CODING_SPEC_ISO_INVOCATION (coding, 1) != reg)
1626     {
1627       /* Since the graphic register REG is not invoked to any graphic
1628          planes, invoke it to graphic plane 0.  */
1629       switch (reg)
1630         {
1631         case 0:                 /* graphic register 0 */
1632           ENCODE_SHIFT_IN;
1633           break;
1634
1635         case 1:                 /* graphic register 1 */
1636           ENCODE_SHIFT_OUT;
1637           break;
1638
1639         case 2:                 /* graphic register 2 */
1640           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1641             ENCODE_SINGLE_SHIFT_2;
1642           else
1643             ENCODE_LOCKING_SHIFT_2;
1644           break;
1645
1646         case 3:                 /* graphic register 3 */
1647           if (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT)
1648             ENCODE_SINGLE_SHIFT_3;
1649           else
1650             ENCODE_LOCKING_SHIFT_3;
1651           break;
1652         }
1653     }
1654   return dst;
1655 }
1656
1657 /* The following two macros produce codes for indicating composition.  */
1658 #define ENCODE_COMPOSITION_NO_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '0'
1659 #define ENCODE_COMPOSITION_WITH_RULE_START  *dst++ = ISO_CODE_ESC, *dst++ = '2'
1660 #define ENCODE_COMPOSITION_END    *dst++ = ISO_CODE_ESC, *dst++ = '1'
1661
1662 /* The following three macros produce codes for indicating direction
1663    of text.  */
1664 #define ENCODE_CONTROL_SEQUENCE_INTRODUCER              \
1665   do {                                                  \
1666     if (coding->flags == CODING_FLAG_ISO_SEVEN_BITS)    \
1667       *dst++ = ISO_CODE_ESC, *dst++ = '[';              \
1668     else                                                \
1669       *dst++ = ISO_CODE_CSI;                            \
1670   } while (0)
1671
1672 #define ENCODE_DIRECTION_R2L    \
1673   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '2', *dst++ = ']'
1674
1675 #define ENCODE_DIRECTION_L2R    \
1676   ENCODE_CONTROL_SEQUENCE_INTRODUCER, *dst++ = '0', *dst++ = ']'
1677
1678 /* Produce codes for designation and invocation to reset the graphic
1679    planes and registers to initial state.  */
1680 #define ENCODE_RESET_PLANE_AND_REGISTER                                     \
1681   do {                                                                      \
1682     int reg;                                                                \
1683     if (CODING_SPEC_ISO_INVOCATION (coding, 0) != 0)                        \
1684       ENCODE_SHIFT_IN;                                                      \
1685     for (reg = 0; reg < 4; reg++)                                           \
1686       if (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg) >= 0            \
1687           && (CODING_SPEC_ISO_DESIGNATION (coding, reg)                     \
1688               != CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg)))        \
1689         ENCODE_DESIGNATION                                                  \
1690           (CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, reg), reg, coding); \
1691   } while (0)
1692
1693 /* Produce designation sequences of charsets in the line started from
1694    SRC to a place pointed by *DSTP, and update DSTP.
1695
1696    If the current block ends before any end-of-line, we may fail to
1697    find all the necessary designations.  */
1698
1699 void
1700 encode_designation_at_bol (coding, table, src, src_end, dstp)
1701      struct coding_system *coding;
1702      Lisp_Object table;
1703      unsigned char *src, *src_end, **dstp;
1704 {
1705   int charset, c, found = 0, reg;
1706   /* Table of charsets to be designated to each graphic register.  */
1707   int r[4];
1708   unsigned char *dst = *dstp;
1709
1710   for (reg = 0; reg < 4; reg++)
1711     r[reg] = -1;
1712
1713   while (src < src_end && *src != '\n' && found < 4)
1714     {
1715       int bytes = BYTES_BY_CHAR_HEAD (*src);
1716
1717       if (NILP (table))
1718         charset = CHARSET_AT (src);
1719       else
1720         {
1721           int c_alt;
1722           unsigned char c1, c2;
1723
1724           SPLIT_STRING(src, bytes, charset, c1, c2);
1725           if ((c_alt = translate_char (table, -1, charset, c1, c2)) >= 0)
1726             charset = CHAR_CHARSET (c_alt);
1727         }
1728
1729       reg = CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset);
1730       if (reg != CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION && r[reg] < 0)
1731         {
1732           found++;
1733           r[reg] = charset;
1734         }
1735
1736       src += bytes;
1737     }
1738
1739   if (found)
1740     {
1741       for (reg = 0; reg < 4; reg++)
1742         if (r[reg] >= 0
1743             && CODING_SPEC_ISO_DESIGNATION (coding, reg) != r[reg])
1744           ENCODE_DESIGNATION (r[reg], reg, coding);
1745       *dstp = dst;
1746     }
1747 }
1748
1749 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".  */
1750
1751 int
1752 encode_coding_iso2022 (coding, source, destination, src_bytes, dst_bytes)
1753      struct coding_system *coding;
1754      unsigned char *source, *destination;
1755      int src_bytes, dst_bytes;
1756 {
1757   unsigned char *src = source;
1758   unsigned char *src_end = source + src_bytes;
1759   unsigned char *dst = destination;
1760   unsigned char *dst_end = destination + dst_bytes;
1761   /* Since the maximum bytes produced by each loop is 20, we subtract 19
1762      from DST_END to assure overflow checking is necessary only at the
1763      head of loop.  */
1764   unsigned char *adjusted_dst_end = dst_end - 19;
1765   Lisp_Object translation_table
1766       = coding->translation_table_for_encode;
1767   int result = CODING_FINISH_NORMAL;
1768
1769   if (!NILP (Venable_character_translation) && NILP (translation_table))
1770     translation_table = Vstandard_translation_table_for_encode;
1771
1772   coding->consumed_char = 0;
1773   coding->fake_multibyte = 0;
1774   while (src < src_end && (dst_bytes
1775                            ? (dst < adjusted_dst_end)
1776                            : (dst < src - 19)))
1777     {
1778       /* SRC_BASE remembers the start position in source in each loop.
1779          The loop will be exited when there's not enough source text
1780          to analyze multi-byte codes (within macros ONE_MORE_BYTE,
1781          TWO_MORE_BYTES, and THREE_MORE_BYTES).  In that case, SRC is
1782          reset to SRC_BASE before exiting.  */
1783       unsigned char *src_base = src;
1784       int charset, c1, c2, c3, c4;
1785
1786       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL
1787           && CODING_SPEC_ISO_BOL (coding))
1788         {
1789           /* We have to produce designation sequences if any now.  */
1790           encode_designation_at_bol (coding, translation_table,
1791                                      src, src_end, &dst);
1792           CODING_SPEC_ISO_BOL (coding) = 0;
1793         }
1794
1795       c1 = *src++;
1796       /* If we are seeing a component of a composite character, we are
1797          seeing a leading-code encoded irregularly for composition, or
1798          a composition rule if composing with rule.  We must set C1 to
1799          a normal leading-code or an ASCII code.  If we are not seeing
1800          a composite character, we must reset composition,
1801          designation, and invocation states.  */
1802       if (COMPOSING_P (coding->composing))
1803         {
1804           if (c1 < 0xA0)
1805             {
1806               /* We are not in a composite character any longer.  */
1807               coding->composing = COMPOSING_NO;
1808               ENCODE_RESET_PLANE_AND_REGISTER;
1809               ENCODE_COMPOSITION_END;
1810             }
1811           else
1812             {
1813               if (coding->composing == COMPOSING_WITH_RULE_RULE)
1814                 {
1815                   *dst++ = c1 & 0x7F;
1816                   coding->composing = COMPOSING_WITH_RULE_HEAD;
1817                   continue;
1818                 }
1819               else if (coding->composing == COMPOSING_WITH_RULE_HEAD)
1820                 coding->composing = COMPOSING_WITH_RULE_RULE;
1821               if (c1 == 0xA0)
1822                 {
1823                   /* This is an ASCII component.  */
1824                   ONE_MORE_BYTE (c1);
1825                   c1 &= 0x7F;
1826                 }
1827               else
1828                 /* This is a leading-code of non ASCII component.  */
1829                 c1 -= 0x20;
1830             }
1831         }
1832
1833       /* Now encode one character.  C1 is a control character, an
1834          ASCII character, or a leading-code of multi-byte character.  */
1835       switch (emacs_code_class[c1])
1836         {
1837         case EMACS_ascii_code:
1838           ENCODE_ISO_CHARACTER (CHARSET_ASCII, c1, /* dummy */ c2);
1839           break;
1840
1841         case EMACS_control_code:
1842           if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1843             ENCODE_RESET_PLANE_AND_REGISTER;
1844           *dst++ = c1;
1845           coding->consumed_char++;
1846           break;
1847
1848         case EMACS_carriage_return_code:
1849           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
1850             {
1851               if (coding->flags & CODING_FLAG_ISO_RESET_AT_CNTL)
1852                 ENCODE_RESET_PLANE_AND_REGISTER;
1853               *dst++ = c1;
1854               coding->consumed_char++;
1855               break;
1856             }
1857           /* fall down to treat '\r' as '\n' ...  */
1858
1859         case EMACS_linefeed_code:
1860           if (coding->flags & CODING_FLAG_ISO_RESET_AT_EOL)
1861             ENCODE_RESET_PLANE_AND_REGISTER;
1862           if (coding->flags & CODING_FLAG_ISO_INIT_AT_BOL)
1863             bcopy (coding->spec.iso2022.initial_designation,
1864                    coding->spec.iso2022.current_designation,
1865                    sizeof coding->spec.iso2022.initial_designation);
1866           if (coding->eol_type == CODING_EOL_LF
1867               || coding->eol_type == CODING_EOL_UNDECIDED)
1868             *dst++ = ISO_CODE_LF;
1869           else if (coding->eol_type == CODING_EOL_CRLF)
1870             *dst++ = ISO_CODE_CR, *dst++ = ISO_CODE_LF;
1871           else
1872             *dst++ = ISO_CODE_CR;
1873           CODING_SPEC_ISO_BOL (coding) = 1;
1874           coding->consumed_char++;
1875           break;
1876
1877         case EMACS_leading_code_2:
1878           ONE_MORE_BYTE (c2);
1879           if (c2 < 0xA0)
1880             {
1881               /* invalid sequence */
1882               *dst++ = c1;
1883               src--;
1884               coding->consumed_char++;
1885             }
1886           else
1887             ENCODE_ISO_CHARACTER (c1, c2, /* dummy */ c3);
1888           break;
1889
1890         case EMACS_leading_code_3:
1891           TWO_MORE_BYTES (c2, c3);
1892           if (c2 < 0xA0 || c3 < 0xA0)
1893             {
1894               /* invalid sequence */
1895               *dst++ = c1;
1896               src -= 2;
1897               coding->consumed_char++;
1898             }
1899           else if (c1 < LEADING_CODE_PRIVATE_11)
1900             ENCODE_ISO_CHARACTER (c1, c2, c3);
1901           else
1902             ENCODE_ISO_CHARACTER (c2, c3, /* dummy */ c4);
1903           break;
1904
1905         case EMACS_leading_code_4:
1906           THREE_MORE_BYTES (c2, c3, c4);
1907           if (c2 < 0xA0 || c3 < 0xA0 || c4 < 0xA0)
1908             {
1909               /* invalid sequence */
1910               *dst++ = c1;
1911               src -= 3;
1912               coding->consumed_char++;
1913             }
1914           else
1915             ENCODE_ISO_CHARACTER (c2, c3, c4);
1916           break;
1917
1918         case EMACS_leading_code_composition:
1919           ONE_MORE_BYTE (c2);
1920           if (c2 < 0xA0)
1921             {
1922               /* invalid sequence */
1923               *dst++ = c1;
1924               src--;
1925               coding->consumed_char++;
1926             }
1927           else if (c2 == 0xFF)
1928             {
1929               ENCODE_RESET_PLANE_AND_REGISTER;
1930               coding->composing = COMPOSING_WITH_RULE_HEAD;
1931               ENCODE_COMPOSITION_WITH_RULE_START;
1932               coding->consumed_char++;
1933             }
1934           else
1935             {
1936               ENCODE_RESET_PLANE_AND_REGISTER;
1937               /* Rewind one byte because it is a character code of
1938                  composition elements.  */
1939               src--;
1940               coding->composing = COMPOSING_NO_RULE_HEAD;
1941               ENCODE_COMPOSITION_NO_RULE_START;
1942               coding->consumed_char++;
1943             }
1944           break;
1945
1946         case EMACS_invalid_code:
1947           *dst++ = c1;
1948           coding->consumed_char++;
1949           break;
1950         }
1951       continue;
1952     label_end_of_loop:
1953       result = CODING_FINISH_INSUFFICIENT_SRC;
1954       src = src_base;
1955       break;
1956     }
1957
1958   if (src < src_end && result == CODING_FINISH_NORMAL)
1959     result = CODING_FINISH_INSUFFICIENT_DST;
1960
1961   /* If this is the last block of the text to be encoded, we must
1962      reset graphic planes and registers to the initial state, and
1963      flush out the carryover if any.  */
1964   if (coding->mode & CODING_MODE_LAST_BLOCK)
1965     {
1966       ENCODE_RESET_PLANE_AND_REGISTER;
1967       if (COMPOSING_P (coding->composing))
1968         ENCODE_COMPOSITION_END;
1969     }
1970   coding->consumed = src - source;
1971   coding->produced = coding->produced_char = dst - destination;
1972   return result;
1973 }
1974
1975 \f
1976 /*** 4. SJIS and BIG5 handlers ***/
1977
1978 /* Although SJIS and BIG5 are not ISO's coding system, they are used
1979    quite widely.  So, for the moment, Emacs supports them in the bare
1980    C code.  But, in the future, they may be supported only by CCL.  */
1981
1982 /* SJIS is a coding system encoding three character sets: ASCII, right
1983    half of JISX0201-Kana, and JISX0208.  An ASCII character is encoded
1984    as is.  A character of charset katakana-jisx0201 is encoded by
1985    "position-code + 0x80".  A character of charset japanese-jisx0208
1986    is encoded in 2-byte but two position-codes are divided and shifted
1987    so that it fit in the range below.
1988
1989    --- CODE RANGE of SJIS ---
1990    (character set)      (range)
1991    ASCII                0x00 .. 0x7F
1992    KATAKANA-JISX0201    0xA0 .. 0xDF
1993    JISX0208 (1st byte)  0x80 .. 0x9F and 0xE0 .. 0xFF
1994             (2nd byte)  0x40 .. 0xFF
1995    -------------------------------
1996
1997 */
1998
1999 /* BIG5 is a coding system encoding two character sets: ASCII and
2000    Big5.  An ASCII character is encoded as is.  Big5 is a two-byte
2001    character set and is encoded in two-byte.
2002
2003    --- CODE RANGE of BIG5 ---
2004    (character set)      (range)
2005    ASCII                0x00 .. 0x7F
2006    Big5 (1st byte)      0xA1 .. 0xFE
2007         (2nd byte)      0x40 .. 0x7E and 0xA1 .. 0xFE
2008    --------------------------
2009
2010    Since the number of characters in Big5 is larger than maximum
2011    characters in Emacs' charset (96x96), it can't be handled as one
2012    charset.  So, in Emacs, Big5 is divided into two: `charset-big5-1'
2013    and `charset-big5-2'.  Both are DIMENSION2 and CHARS94.  The former
2014    contains frequently used characters and the latter contains less
2015    frequently used characters.  */
2016
2017 /* Macros to decode or encode a character of Big5 in BIG5.  B1 and B2
2018    are the 1st and 2nd position-codes of Big5 in BIG5 coding system.
2019    C1 and C2 are the 1st and 2nd position-codes of of Emacs' internal
2020    format.  CHARSET is `charset_big5_1' or `charset_big5_2'.  */
2021
2022 /* Number of Big5 characters which have the same code in 1st byte.  */
2023 #define BIG5_SAME_ROW (0xFF - 0xA1 + 0x7F - 0x40)
2024
2025 #define DECODE_BIG5(b1, b2, charset, c1, c2)                            \
2026   do {                                                                  \
2027     unsigned int temp                                                   \
2028       = (b1 - 0xA1) * BIG5_SAME_ROW + b2 - (b2 < 0x7F ? 0x40 : 0x62);   \
2029     if (b1 < 0xC9)                                                      \
2030       charset = charset_big5_1;                                         \
2031     else                                                                \
2032       {                                                                 \
2033         charset = charset_big5_2;                                       \
2034         temp -= (0xC9 - 0xA1) * BIG5_SAME_ROW;                          \
2035       }                                                                 \
2036     c1 = temp / (0xFF - 0xA1) + 0x21;                                   \
2037     c2 = temp % (0xFF - 0xA1) + 0x21;                                   \
2038   } while (0)
2039
2040 #define ENCODE_BIG5(charset, c1, c2, b1, b2)                            \
2041   do {                                                                  \
2042     unsigned int temp = (c1 - 0x21) * (0xFF - 0xA1) + (c2 - 0x21);      \
2043     if (charset == charset_big5_2)                                      \
2044       temp += BIG5_SAME_ROW * (0xC9 - 0xA1);                            \
2045     b1 = temp / BIG5_SAME_ROW + 0xA1;                                   \
2046     b2 = temp % BIG5_SAME_ROW;                                          \
2047     b2 += b2 < 0x3F ? 0x40 : 0x62;                                      \
2048   } while (0)
2049
2050 #define DECODE_SJIS_BIG5_CHARACTER(charset, c1, c2)                     \
2051   do {                                                                  \
2052     int c_alt, charset_alt = (charset);                                 \
2053     if (!NILP (translation_table)                                       \
2054         && ((c_alt = translate_char (translation_table,                 \
2055                                      -1, (charset), c1, c2)) >= 0))     \
2056           SPLIT_CHAR (c_alt, charset_alt, c1, c2);                      \
2057     if (charset_alt == CHARSET_ASCII || charset_alt < 0)                \
2058       DECODE_CHARACTER_ASCII (c1);                                      \
2059     else if (CHARSET_DIMENSION (charset_alt) == 1)                      \
2060       DECODE_CHARACTER_DIMENSION1 (charset_alt, c1);                    \
2061     else                                                                \
2062       DECODE_CHARACTER_DIMENSION2 (charset_alt, c1, c2);                \
2063   } while (0)
2064
2065 #define ENCODE_SJIS_BIG5_CHARACTER(charset, c1, c2)             \
2066   do {                                                          \
2067     int c_alt, charset_alt;                                     \
2068     if (!NILP (translation_table)                               \
2069         && ((c_alt = translate_char (translation_table, -1,     \
2070                                      charset, c1, c2))          \
2071             >= 0))                                              \
2072       SPLIT_CHAR (c_alt, charset_alt, c1, c2);                  \
2073     else                                                        \
2074       charset_alt = charset;                                    \
2075     if (charset_alt == charset_ascii)                           \
2076       *dst++ = c1;                                              \
2077     else if (CHARSET_DIMENSION (charset_alt) == 1)              \
2078       {                                                         \
2079         if (sjis_p && charset_alt == charset_katakana_jisx0201) \
2080           *dst++ = c1;                                          \
2081         else                                                    \
2082           {                                                     \
2083             *dst++ = charset_alt, *dst++ = c1;                  \
2084             coding->fake_multibyte = 1;                         \
2085           }                                                     \
2086       }                                                         \
2087     else                                                        \
2088       {                                                         \
2089         c1 &= 0x7F, c2 &= 0x7F;                                 \
2090         if (sjis_p && charset_alt == charset_jisx0208)          \
2091           {                                                     \
2092             unsigned char s1, s2;                               \
2093                                                                 \
2094             ENCODE_SJIS (c1, c2, s1, s2);                       \
2095             *dst++ = s1, *dst++ = s2;                           \
2096             coding->fake_multibyte = 1;                         \
2097           }                                                     \
2098         else if (!sjis_p                                        \
2099                  && (charset_alt == charset_big5_1              \
2100                      || charset_alt == charset_big5_2))         \
2101           {                                                     \
2102             unsigned char b1, b2;                               \
2103                                                                 \
2104             ENCODE_BIG5 (charset_alt, c1, c2, b1, b2);          \
2105             *dst++ = b1, *dst++ = b2;                           \
2106           }                                                     \
2107         else                                                    \
2108           {                                                     \
2109             *dst++ = charset_alt, *dst++ = c1, *dst++ = c2;     \
2110             coding->fake_multibyte = 1;                         \
2111           }                                                     \
2112       }                                                         \
2113     coding->consumed_char++;                                    \
2114   } while (0);
2115
2116 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2117    Check if a text is encoded in SJIS.  If it is, return
2118    CODING_CATEGORY_MASK_SJIS, else return 0.  */
2119
2120 int
2121 detect_coding_sjis (src, src_end)
2122      unsigned char *src, *src_end;
2123 {
2124   unsigned char c;
2125
2126   while (src < src_end)
2127     {
2128       c = *src++;
2129       if ((c >= 0x80 && c < 0xA0) || c >= 0xE0)
2130         {
2131           if (src < src_end && *src++ < 0x40)
2132             return 0;
2133         }
2134     }
2135   return CODING_CATEGORY_MASK_SJIS;
2136 }
2137
2138 /* See the above "GENERAL NOTES on `detect_coding_XXX ()' functions".
2139    Check if a text is encoded in BIG5.  If it is, return
2140    CODING_CATEGORY_MASK_BIG5, else return 0.  */
2141
2142 int
2143 detect_coding_big5 (src, src_end)
2144      unsigned char *src, *src_end;
2145 {
2146   unsigned char c;
2147
2148   while (src < src_end)
2149     {
2150       c = *src++;
2151       if (c >= 0xA1)
2152         {
2153           if (src >= src_end)
2154             break;
2155           c = *src++;
2156           if (c < 0x40 || (c >= 0x7F && c <= 0xA0))
2157             return 0;
2158         }
2159     }
2160   return CODING_CATEGORY_MASK_BIG5;
2161 }
2162
2163 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2164    If SJIS_P is 1, decode SJIS text, else decode BIG5 test.  */
2165
2166 int
2167 decode_coding_sjis_big5 (coding, source, destination,
2168                          src_bytes, dst_bytes, sjis_p)
2169      struct coding_system *coding;
2170      unsigned char *source, *destination;
2171      int src_bytes, dst_bytes;
2172      int sjis_p;
2173 {
2174   unsigned char *src = source;
2175   unsigned char *src_end = source + src_bytes;
2176   unsigned char *dst = destination;
2177   unsigned char *dst_end = destination + dst_bytes;
2178   /* Since the maximum bytes produced by each loop is 4, we subtract 3
2179      from DST_END to assure overflow checking is necessary only at the
2180      head of loop.  */
2181   unsigned char *adjusted_dst_end = dst_end - 3;
2182   Lisp_Object translation_table
2183       = coding->translation_table_for_decode;
2184   int result = CODING_FINISH_NORMAL;
2185
2186   if (!NILP (Venable_character_translation) && NILP (translation_table))
2187     translation_table = Vstandard_translation_table_for_decode;
2188
2189   coding->produced_char = 0;
2190   coding->fake_multibyte = 0;
2191   while (src < src_end && (dst_bytes
2192                            ? (dst < adjusted_dst_end)
2193                            : (dst < src - 3)))
2194     {
2195       /* SRC_BASE remembers the start position in source in each loop.
2196          The loop will be exited when there's not enough source text
2197          to analyze two-byte character (within macro ONE_MORE_BYTE).
2198          In that case, SRC is reset to SRC_BASE before exiting.  */
2199       unsigned char *src_base = src;
2200       unsigned char c1 = *src++, c2, c3, c4;
2201
2202       if (c1 < 0x20)
2203         {
2204           if (c1 == '\r')
2205             {
2206               if (coding->eol_type == CODING_EOL_CRLF)
2207                 {
2208                   ONE_MORE_BYTE (c2);
2209                   if (c2 == '\n')
2210                     *dst++ = c2;
2211                   else if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2212                     {
2213                       result = CODING_FINISH_INCONSISTENT_EOL;
2214                       goto label_end_of_loop_2;
2215                     }
2216                   else
2217                     /* To process C2 again, SRC is subtracted by 1.  */
2218                     *dst++ = c1, src--;
2219                 }
2220               else if (coding->eol_type == CODING_EOL_CR)
2221                 *dst++ = '\n';
2222               else
2223                 *dst++ = c1;
2224             }
2225           else if (c1 == '\n'
2226                    && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2227                    && (coding->eol_type == CODING_EOL_CR
2228                        || coding->eol_type == CODING_EOL_CRLF))
2229             {
2230               result = CODING_FINISH_INCONSISTENT_EOL;
2231               goto label_end_of_loop_2;
2232             }
2233           else
2234             *dst++ = c1;
2235           coding->produced_char++;
2236         }
2237       else if (c1 < 0x80)
2238         DECODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2239       else if (c1 < 0xA0)
2240         {
2241           /* SJIS -> JISX0208 */
2242           if (sjis_p)
2243             {
2244               ONE_MORE_BYTE (c2);
2245               if (c2 >= 0x40)
2246                 {
2247                   DECODE_SJIS (c1, c2, c3, c4);
2248                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2249                 }
2250               else
2251                 goto label_invalid_code_2;
2252             }
2253           else
2254             goto label_invalid_code_1;
2255         }
2256       else if (c1 < 0xE0)
2257         {
2258           /* SJIS -> JISX0201-Kana, BIG5 -> Big5 */
2259           if (sjis_p)
2260             DECODE_SJIS_BIG5_CHARACTER (charset_katakana_jisx0201, c1,
2261                                         /* dummy */ c2);
2262           else
2263             {
2264               int charset;
2265
2266               ONE_MORE_BYTE (c2);
2267               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2268                 {
2269                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2270                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2271                 }
2272               else
2273                 goto label_invalid_code_2;
2274             }
2275         }
2276       else                      /* C1 >= 0xE0 */
2277         {
2278           /* SJIS -> JISX0208, BIG5 -> Big5 */
2279           if (sjis_p)
2280             {
2281               ONE_MORE_BYTE (c2);
2282               if (c2 >= 0x40)
2283                 {
2284                   DECODE_SJIS (c1, c2, c3, c4);
2285                   DECODE_SJIS_BIG5_CHARACTER (charset_jisx0208, c3, c4);
2286                 }
2287               else
2288                 goto label_invalid_code_2;
2289             }
2290           else
2291             {
2292               int charset;
2293
2294               ONE_MORE_BYTE (c2);
2295               if ((c2 >= 0x40 && c2 <= 0x7E) || (c2 >= 0xA1 && c2 <= 0xFE))
2296                 {
2297                   DECODE_BIG5 (c1, c2, charset, c3, c4);
2298                   DECODE_SJIS_BIG5_CHARACTER (charset, c3, c4);
2299                 }
2300               else
2301                 goto label_invalid_code_2;
2302             }
2303         }
2304       continue;
2305
2306     label_invalid_code_1:
2307       *dst++ = c1;
2308       coding->produced_char++;
2309       coding->fake_multibyte = 1;
2310       continue;
2311
2312     label_invalid_code_2:
2313       *dst++ = c1; *dst++= c2;
2314       coding->produced_char += 2;
2315       coding->fake_multibyte = 1;
2316       continue;
2317
2318     label_end_of_loop:
2319       result = CODING_FINISH_INSUFFICIENT_SRC;
2320     label_end_of_loop_2:
2321       src = src_base;
2322       break;
2323     }
2324
2325   if (src < src_end)
2326     {
2327       if (result == CODING_FINISH_NORMAL)
2328         result = CODING_FINISH_INSUFFICIENT_DST;
2329       else if (result != CODING_FINISH_INCONSISTENT_EOL
2330                && coding->mode & CODING_MODE_LAST_BLOCK)
2331         {
2332           src_bytes = src_end - src;
2333           if (dst_bytes && (dst_end - dst < src_bytes))
2334             src_bytes = dst_end - dst;
2335           bcopy (dst, src, src_bytes);
2336           src += src_bytes;
2337           dst += src_bytes;
2338           coding->fake_multibyte = 1;
2339         }
2340     }
2341
2342   coding->consumed = coding->consumed_char = src - source;
2343   coding->produced = dst - destination;
2344   return result;
2345 }
2346
2347 /* See the above "GENERAL NOTES on `encode_coding_XXX ()' functions".
2348    This function can encode `charset_ascii', `charset_katakana_jisx0201',
2349    `charset_jisx0208', `charset_big5_1', and `charset_big5-2'.  We are
2350    sure that all these charsets are registered as official charset
2351    (i.e. do not have extended leading-codes).  Characters of other
2352    charsets are produced without any encoding.  If SJIS_P is 1, encode
2353    SJIS text, else encode BIG5 text.  */
2354
2355 int
2356 encode_coding_sjis_big5 (coding, source, destination,
2357                          src_bytes, dst_bytes, sjis_p)
2358      struct coding_system *coding;
2359      unsigned char *source, *destination;
2360      int src_bytes, dst_bytes;
2361      int sjis_p;
2362 {
2363   unsigned char *src = source;
2364   unsigned char *src_end = source + src_bytes;
2365   unsigned char *dst = destination;
2366   unsigned char *dst_end = destination + dst_bytes;
2367   /* Since the maximum bytes produced by each loop is 2, we subtract 1
2368      from DST_END to assure overflow checking is necessary only at the
2369      head of loop.  */
2370   unsigned char *adjusted_dst_end = dst_end - 1;
2371   Lisp_Object translation_table
2372       = coding->translation_table_for_encode;
2373   int result = CODING_FINISH_NORMAL;
2374
2375   if (!NILP (Venable_character_translation) && NILP (translation_table))
2376     translation_table = Vstandard_translation_table_for_encode;
2377
2378   coding->consumed_char = 0;
2379   coding->fake_multibyte = 0;
2380   while (src < src_end && (dst_bytes
2381                            ? (dst < adjusted_dst_end)
2382                            : (dst < src - 1)))
2383     {
2384       /* SRC_BASE remembers the start position in source in each loop.
2385          The loop will be exited when there's not enough source text
2386          to analyze multi-byte codes (within macros ONE_MORE_BYTE and
2387          TWO_MORE_BYTES).  In that case, SRC is reset to SRC_BASE
2388          before exiting.  */
2389       unsigned char *src_base = src;
2390       unsigned char c1 = *src++, c2, c3, c4;
2391
2392       if (coding->composing)
2393         {
2394           if (c1 == 0xA0)
2395             {
2396               ONE_MORE_BYTE (c1);
2397               c1 &= 0x7F;
2398             }
2399           else if (c1 >= 0xA0)
2400             c1 -= 0x20;
2401           else
2402             coding->composing = 0;
2403         }
2404
2405       switch (emacs_code_class[c1])
2406         {
2407         case EMACS_ascii_code:
2408           ENCODE_SJIS_BIG5_CHARACTER (charset_ascii, c1, /* dummy */ c2);
2409           break;
2410
2411         case EMACS_control_code:
2412           *dst++ = c1;
2413           coding->consumed_char++;
2414           break;
2415
2416         case EMACS_carriage_return_code:
2417           if (! (coding->mode & CODING_MODE_SELECTIVE_DISPLAY))
2418             {
2419               *dst++ = c1;
2420               coding->consumed_char++;
2421               break;
2422             }
2423           /* fall down to treat '\r' as '\n' ...  */
2424
2425         case EMACS_linefeed_code:
2426           if (coding->eol_type == CODING_EOL_LF
2427               || coding->eol_type == CODING_EOL_UNDECIDED)
2428             *dst++ = '\n';
2429           else if (coding->eol_type == CODING_EOL_CRLF)
2430             *dst++ = '\r', *dst++ = '\n';
2431           else
2432             *dst++ = '\r';
2433           coding->consumed_char++;
2434           break;
2435
2436         case EMACS_leading_code_2:
2437           ONE_MORE_BYTE (c2);
2438           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, /* dummy */ c3);
2439           break;
2440
2441         case EMACS_leading_code_3:
2442           TWO_MORE_BYTES (c2, c3);
2443           ENCODE_SJIS_BIG5_CHARACTER (c1, c2, c3);
2444           break;
2445
2446         case EMACS_leading_code_4:
2447           THREE_MORE_BYTES (c2, c3, c4);
2448           ENCODE_SJIS_BIG5_CHARACTER (c2, c3, c4);
2449           break;
2450
2451         case EMACS_leading_code_composition:
2452           coding->composing = 1;
2453           break;
2454
2455         default:                /* i.e. case EMACS_invalid_code: */
2456           *dst++ = c1;
2457           coding->consumed_char++;
2458         }
2459       continue;
2460
2461     label_end_of_loop:
2462       result = CODING_FINISH_INSUFFICIENT_SRC;
2463       src = src_base;
2464       break;
2465     }
2466
2467   if (result == CODING_FINISH_NORMAL
2468       && src < src_end)
2469     result = CODING_FINISH_INSUFFICIENT_DST;
2470   coding->consumed = src - source;
2471   coding->produced = coding->produced_char = dst - destination;
2472   return result;
2473 }
2474
2475 \f
2476 /*** 5. End-of-line handlers ***/
2477
2478 /* See the above "GENERAL NOTES on `decode_coding_XXX ()' functions".
2479    This function is called only when `coding->eol_type' is
2480    CODING_EOL_CRLF or CODING_EOL_CR.  */
2481
2482 int
2483 decode_eol (coding, source, destination, src_bytes, dst_bytes)
2484      struct coding_system *coding;
2485      unsigned char *source, *destination;
2486      int src_bytes, dst_bytes;
2487 {
2488   unsigned char *src = source;
2489   unsigned char *src_end = source + src_bytes;
2490   unsigned char *dst = destination;
2491   unsigned char *dst_end = destination + dst_bytes;
2492   unsigned char c;
2493   int result = CODING_FINISH_NORMAL;
2494
2495   coding->fake_multibyte = 0;
2496
2497   if (src_bytes <= 0)
2498     return result;
2499
2500   switch (coding->eol_type)
2501     {
2502     case CODING_EOL_CRLF:
2503       {
2504         /* Since the maximum bytes produced by each loop is 2, we
2505            subtract 1 from DST_END to assure overflow checking is
2506            necessary only at the head of loop.  */
2507         unsigned char *adjusted_dst_end = dst_end - 1;
2508
2509         while (src < src_end && (dst_bytes
2510                                  ? (dst < adjusted_dst_end)
2511                                  : (dst < src - 1)))
2512           {
2513             unsigned char *src_base = src;
2514
2515             c = *src++;
2516             if (c == '\r')
2517               {
2518                 ONE_MORE_BYTE (c);
2519                 if (c != '\n')
2520                   {
2521                     if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2522                       {
2523                         result = CODING_FINISH_INCONSISTENT_EOL;
2524                         goto label_end_of_loop_2;
2525                       }
2526                     *dst++ = '\r';
2527                     if (BASE_LEADING_CODE_P (c))
2528                       coding->fake_multibyte = 1;
2529                   }
2530                 *dst++ = c;
2531               }
2532             else if (c == '\n'
2533                      && (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL))
2534               {
2535                 result = CODING_FINISH_INCONSISTENT_EOL;
2536                 goto label_end_of_loop_2;
2537               }
2538             else
2539               {
2540                 *dst++ = c;
2541                 if (BASE_LEADING_CODE_P (c))
2542                   coding->fake_multibyte = 1;
2543               }
2544             continue;
2545
2546           label_end_of_loop:
2547             result = CODING_FINISH_INSUFFICIENT_SRC;
2548           label_end_of_loop_2:
2549             src = src_base;
2550             break;
2551           }
2552         if (result == CODING_FINISH_NORMAL
2553             && src < src_end)
2554           result = CODING_FINISH_INSUFFICIENT_DST;
2555       }
2556       break;
2557
2558     case CODING_EOL_CR:
2559       if (coding->mode & CODING_MODE_INHIBIT_INCONSISTENT_EOL)
2560         {
2561           while (src < src_end)
2562             {
2563               if ((c = *src++) == '\n')
2564                 break;
2565               if (BASE_LEADING_CODE_P (c))
2566                 coding->fake_multibyte = 1;
2567             }
2568           if (*--src == '\n')
2569             {
2570               src_bytes = src - source;
2571               result = CODING_FINISH_INCONSISTENT_EOL;
2572             }
2573         }
2574       if (dst_bytes && src_bytes > dst_bytes)
2575         {
2576           result = CODING_FINISH_INSUFFICIENT_DST;
2577           src_bytes = dst_bytes;
2578         }
2579       if (dst_bytes)
2580         bcopy (source, destination, src_bytes);
2581       else
2582         safe_bcopy (source, destination, src_bytes);
2583       src = source + src_bytes;
2584       while (src_bytes--) if (*dst++ == '\r') dst[-1] = '\n';
2585       break;
2586
2587     default:                    /* i.e. case: CODING_EOL_LF */
2588       if (dst_bytes && src_bytes > dst_bytes)
2589         {
2590           result = CODING_FINISH_INSUFFICIENT_DST;
2591           src_bytes = dst_bytes;
2592         }
2593       if (dst_bytes)
2594         bcopy (source, destination, src_bytes);
2595       else
2596         safe_bcopy (source, destination, src_bytes);
2597       src += src_bytes;
2598       dst += src_bytes;
2599       coding->fake_multibyte = 1;
2600       break;
2601     }
2602
2603   coding->consumed = coding->consumed_char = src - source;
2604   coding->produced = coding->produced_char = dst - destination;
2605   return result;
2606 }
2607
2608 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  Encode
2609    format of end-of-line according to `coding->eol_type'.  If
2610    `coding->mode & CODING_MODE_SELECTIVE_DISPLAY' is nonzero, code
2611    '\r' in source text also means end-of-line.  */
2612
2613 int
2614 encode_eol (coding, source, destination, src_bytes, dst_bytes)
2615      struct coding_system *coding;
2616      unsigned char *source, *destination;
2617      int src_bytes, dst_bytes;
2618 {
2619   unsigned char *src = source;
2620   unsigned char *dst = destination;
2621   int result = CODING_FINISH_NORMAL;
2622
2623   coding->fake_multibyte = 0;
2624
2625   if (coding->eol_type == CODING_EOL_CRLF)
2626     {
2627       unsigned char c;
2628       unsigned char *src_end = source + src_bytes;
2629       unsigned char *dst_end = destination + dst_bytes;
2630       /* Since the maximum bytes produced by each loop is 2, we
2631          subtract 1 from DST_END to assure overflow checking is
2632          necessary only at the head of loop.  */
2633       unsigned char *adjusted_dst_end = dst_end - 1;
2634
2635       while (src < src_end && (dst_bytes
2636                                ? (dst < adjusted_dst_end)
2637                                : (dst < src - 1)))
2638         {
2639           c = *src++;
2640           if (c == '\n'
2641               || (c == '\r' && (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)))
2642             *dst++ = '\r', *dst++ = '\n';
2643           else
2644             {
2645               *dst++ = c;
2646               if (BASE_LEADING_CODE_P (c))
2647                 coding->fake_multibyte = 1;
2648             }
2649         }
2650       if (src < src_end)
2651         result = CODING_FINISH_INSUFFICIENT_DST;
2652     }
2653   else
2654     {
2655       unsigned char c;
2656
2657       if (dst_bytes && src_bytes > dst_bytes)
2658         {
2659           src_bytes = dst_bytes;
2660           result = CODING_FINISH_INSUFFICIENT_DST;
2661         }
2662       if (dst_bytes)
2663         bcopy (source, destination, src_bytes);
2664       else
2665         safe_bcopy (source, destination, src_bytes);
2666       dst_bytes = src_bytes;
2667       if (coding->eol_type == CODING_EOL_CR)
2668         {
2669           while (src_bytes--)
2670             {
2671               if ((c = *dst++) == '\n')
2672                 dst[-1] = '\r';
2673               else if (BASE_LEADING_CODE_P (c))
2674                 coding->fake_multibyte = 1;
2675             }
2676         }
2677       else
2678         {
2679           if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
2680             {
2681               while (src_bytes--)
2682                 if (*dst++ == '\r') dst[-1] = '\n';
2683             }
2684           coding->fake_multibyte = 1;
2685         }
2686       src = source + dst_bytes;
2687       dst = destination + dst_bytes;
2688     }
2689
2690   coding->consumed = coding->consumed_char = src - source;
2691   coding->produced = coding->produced_char = dst - destination;
2692   return result;
2693 }
2694
2695 \f
2696 /*** 6. C library functions ***/
2697
2698 /* In Emacs Lisp, coding system is represented by a Lisp symbol which
2699    has a property `coding-system'.  The value of this property is a
2700    vector of length 5 (called as coding-vector).  Among elements of
2701    this vector, the first (element[0]) and the fifth (element[4])
2702    carry important information for decoding/encoding.  Before
2703    decoding/encoding, this information should be set in fields of a
2704    structure of type `coding_system'.
2705
2706    A value of property `coding-system' can be a symbol of another
2707    subsidiary coding-system.  In that case, Emacs gets coding-vector
2708    from that symbol.
2709
2710    `element[0]' contains information to be set in `coding->type'.  The
2711    value and its meaning is as follows:
2712
2713    0 -- coding_type_emacs_mule
2714    1 -- coding_type_sjis
2715    2 -- coding_type_iso2022
2716    3 -- coding_type_big5
2717    4 -- coding_type_ccl encoder/decoder written in CCL
2718    nil -- coding_type_no_conversion
2719    t -- coding_type_undecided (automatic conversion on decoding,
2720                                no-conversion on encoding)
2721
2722    `element[4]' contains information to be set in `coding->flags' and
2723    `coding->spec'.  The meaning varies by `coding->type'.
2724
2725    If `coding->type' is `coding_type_iso2022', element[4] is a vector
2726    of length 32 (of which the first 13 sub-elements are used now).
2727    Meanings of these sub-elements are:
2728
2729    sub-element[N] where N is 0 through 3: to be set in `coding->spec.iso2022'
2730         If the value is an integer of valid charset, the charset is
2731         assumed to be designated to graphic register N initially.
2732
2733         If the value is minus, it is a minus value of charset which
2734         reserves graphic register N, which means that the charset is
2735         not designated initially but should be designated to graphic
2736         register N just before encoding a character in that charset.
2737
2738         If the value is nil, graphic register N is never used on
2739         encoding.
2740
2741    sub-element[N] where N is 4 through 11: to be set in `coding->flags'
2742         Each value takes t or nil.  See the section ISO2022 of
2743         `coding.h' for more information.
2744
2745    If `coding->type' is `coding_type_big5', element[4] is t to denote
2746    BIG5-ETen or nil to denote BIG5-HKU.
2747
2748    If `coding->type' takes the other value, element[4] is ignored.
2749
2750    Emacs Lisp's coding system also carries information about format of
2751    end-of-line in a value of property `eol-type'.  If the value is
2752    integer, 0 means CODING_EOL_LF, 1 means CODING_EOL_CRLF, and 2
2753    means CODING_EOL_CR.  If it is not integer, it should be a vector
2754    of subsidiary coding systems of which property `eol-type' has one
2755    of above values.
2756
2757 */
2758
2759 /* Extract information for decoding/encoding from CODING_SYSTEM_SYMBOL
2760    and set it in CODING.  If CODING_SYSTEM_SYMBOL is invalid, CODING
2761    is setup so that no conversion is necessary and return -1, else
2762    return 0.  */
2763
2764 int
2765 setup_coding_system (coding_system, coding)
2766      Lisp_Object coding_system;
2767      struct coding_system *coding;
2768 {
2769   Lisp_Object coding_spec, coding_type, eol_type, plist;
2770   Lisp_Object val;
2771   int i;
2772
2773   /* Initialize some fields required for all kinds of coding systems.  */
2774   coding->symbol = coding_system;
2775   coding->common_flags = 0;
2776   coding->mode = 0;
2777   coding->heading_ascii = -1;
2778   coding->post_read_conversion = coding->pre_write_conversion = Qnil;
2779   coding_spec = Fget (coding_system, Qcoding_system);
2780   if (!VECTORP (coding_spec)
2781       || XVECTOR (coding_spec)->size != 5
2782       || !CONSP (XVECTOR (coding_spec)->contents[3]))
2783     goto label_invalid_coding_system;
2784
2785   eol_type = inhibit_eol_conversion ? Qnil : Fget (coding_system, Qeol_type);
2786   if (VECTORP (eol_type))
2787     {
2788       coding->eol_type = CODING_EOL_UNDECIDED;
2789       coding->common_flags = CODING_REQUIRE_DETECTION_MASK;
2790     }
2791   else if (XFASTINT (eol_type) == 1)
2792     {
2793       coding->eol_type = CODING_EOL_CRLF;
2794       coding->common_flags
2795         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2796     }
2797   else if (XFASTINT (eol_type) == 2)
2798     {
2799       coding->eol_type = CODING_EOL_CR;
2800       coding->common_flags
2801         = CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2802     }
2803   else
2804     coding->eol_type = CODING_EOL_LF;
2805
2806   coding_type = XVECTOR (coding_spec)->contents[0];
2807   /* Try short cut.  */
2808   if (SYMBOLP (coding_type))
2809     {
2810       if (EQ (coding_type, Qt))
2811         {
2812           coding->type = coding_type_undecided;
2813           coding->common_flags |= CODING_REQUIRE_DETECTION_MASK;
2814         }
2815       else
2816         coding->type = coding_type_no_conversion;
2817       return 0;
2818     }
2819
2820   /* Initialize remaining fields.  */
2821   coding->composing = 0;
2822   coding->translation_table_for_decode = Qnil;
2823   coding->translation_table_for_encode = Qnil;
2824
2825   /* Get values of coding system properties:
2826      `post-read-conversion', `pre-write-conversion',
2827      `translation-table-for-decode', `translation-table-for-encode'.  */
2828   plist = XVECTOR (coding_spec)->contents[3];
2829   coding->post_read_conversion = Fplist_get (plist, Qpost_read_conversion);
2830   coding->pre_write_conversion = Fplist_get (plist, Qpre_write_conversion);
2831   val = Fplist_get (plist, Qtranslation_table_for_decode);
2832   if (SYMBOLP (val))
2833     val = Fget (val, Qtranslation_table_for_decode);
2834   coding->translation_table_for_decode = CHAR_TABLE_P (val) ? val : Qnil;
2835   val = Fplist_get (plist, Qtranslation_table_for_encode);
2836   if (SYMBOLP (val))
2837     val = Fget (val, Qtranslation_table_for_encode);
2838   coding->translation_table_for_encode = CHAR_TABLE_P (val) ? val : Qnil;
2839   val = Fplist_get (plist, Qcoding_category);
2840   if (!NILP (val))
2841     {
2842       val = Fget (val, Qcoding_category_index);
2843       if (INTEGERP (val))
2844         coding->category_idx = XINT (val);
2845       else
2846         goto label_invalid_coding_system;
2847     }
2848   else
2849     goto label_invalid_coding_system;
2850
2851   val = Fplist_get (plist, Qsafe_charsets);
2852   if (EQ (val, Qt))
2853     {
2854       for (i = 0; i <= MAX_CHARSET; i++)
2855         coding->safe_charsets[i] = 1;
2856     }
2857   else
2858     {
2859       bzero (coding->safe_charsets, MAX_CHARSET + 1);
2860       while (CONSP (val))
2861         {
2862           if ((i = get_charset_id (XCONS (val)->car)) >= 0)
2863             coding->safe_charsets[i] = 1;
2864           val = XCONS (val)->cdr;
2865         }
2866     }
2867
2868   switch (XFASTINT (coding_type))
2869     {
2870     case 0:
2871       coding->type = coding_type_emacs_mule;
2872       if (!NILP (coding->post_read_conversion))
2873         coding->common_flags |= CODING_REQUIRE_DECODING_MASK;
2874       if (!NILP (coding->pre_write_conversion))
2875         coding->common_flags |= CODING_REQUIRE_ENCODING_MASK;
2876       break;
2877
2878     case 1:
2879       coding->type = coding_type_sjis;
2880       coding->common_flags
2881         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2882       break;
2883
2884     case 2:
2885       coding->type = coding_type_iso2022;
2886       coding->common_flags
2887         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
2888       {
2889         Lisp_Object val, temp;
2890         Lisp_Object *flags;
2891         int i, charset, reg_bits = 0;
2892
2893         val = XVECTOR (coding_spec)->contents[4];
2894
2895         if (!VECTORP (val) || XVECTOR (val)->size != 32)
2896           goto label_invalid_coding_system;
2897
2898         flags = XVECTOR (val)->contents;
2899         coding->flags
2900           = ((NILP (flags[4]) ? 0 : CODING_FLAG_ISO_SHORT_FORM)
2901              | (NILP (flags[5]) ? 0 : CODING_FLAG_ISO_RESET_AT_EOL)
2902              | (NILP (flags[6]) ? 0 : CODING_FLAG_ISO_RESET_AT_CNTL)
2903              | (NILP (flags[7]) ? 0 : CODING_FLAG_ISO_SEVEN_BITS)
2904              | (NILP (flags[8]) ? 0 : CODING_FLAG_ISO_LOCKING_SHIFT)
2905              | (NILP (flags[9]) ? 0 : CODING_FLAG_ISO_SINGLE_SHIFT)
2906              | (NILP (flags[10]) ? 0 : CODING_FLAG_ISO_USE_ROMAN)
2907              | (NILP (flags[11]) ? 0 : CODING_FLAG_ISO_USE_OLDJIS)
2908              | (NILP (flags[12]) ? 0 : CODING_FLAG_ISO_NO_DIRECTION)
2909              | (NILP (flags[13]) ? 0 : CODING_FLAG_ISO_INIT_AT_BOL)
2910              | (NILP (flags[14]) ? 0 : CODING_FLAG_ISO_DESIGNATE_AT_BOL)
2911              | (NILP (flags[15]) ? 0 : CODING_FLAG_ISO_SAFE)
2912              | (NILP (flags[16]) ? 0 : CODING_FLAG_ISO_LATIN_EXTRA)
2913              );
2914
2915         /* Invoke graphic register 0 to plane 0.  */
2916         CODING_SPEC_ISO_INVOCATION (coding, 0) = 0;
2917         /* Invoke graphic register 1 to plane 1 if we can use full 8-bit.  */
2918         CODING_SPEC_ISO_INVOCATION (coding, 1)
2919           = (coding->flags & CODING_FLAG_ISO_SEVEN_BITS ? -1 : 1);
2920         /* Not single shifting at first.  */
2921         CODING_SPEC_ISO_SINGLE_SHIFTING (coding) = 0;
2922         /* Beginning of buffer should also be regarded as bol. */
2923         CODING_SPEC_ISO_BOL (coding) = 1;
2924
2925         for (charset = 0; charset <= MAX_CHARSET; charset++)
2926           CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = 255;
2927         val = Vcharset_revision_alist;
2928         while (CONSP (val))
2929           {
2930             charset = get_charset_id (Fcar_safe (XCONS (val)->car));
2931             if (charset >= 0
2932                 && (temp = Fcdr_safe (XCONS (val)->car), INTEGERP (temp))
2933                 && (i = XINT (temp), (i >= 0 && (i + '@') < 128)))
2934               CODING_SPEC_ISO_REVISION_NUMBER (coding, charset) = i;
2935             val = XCONS (val)->cdr;
2936           }
2937
2938         /* Checks FLAGS[REG] (REG = 0, 1, 2 3) and decide designations.
2939            FLAGS[REG] can be one of below:
2940                 integer CHARSET: CHARSET occupies register I,
2941                 t: designate nothing to REG initially, but can be used
2942                   by any charsets,
2943                 list of integer, nil, or t: designate the first
2944                   element (if integer) to REG initially, the remaining
2945                   elements (if integer) is designated to REG on request,
2946                   if an element is t, REG can be used by any charsets,
2947                 nil: REG is never used.  */
2948         for (charset = 0; charset <= MAX_CHARSET; charset++)
2949           CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2950             = CODING_SPEC_ISO_NO_REQUESTED_DESIGNATION;
2951         for (i = 0; i < 4; i++)
2952           {
2953             if (INTEGERP (flags[i])
2954                 && (charset = XINT (flags[i]), CHARSET_VALID_P (charset))
2955                 || (charset = get_charset_id (flags[i])) >= 0)
2956               {
2957                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2958                 CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) = i;
2959               }
2960             else if (EQ (flags[i], Qt))
2961               {
2962                 CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2963                 reg_bits |= 1 << i;
2964                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2965               }
2966             else if (CONSP (flags[i]))
2967               {
2968                 Lisp_Object tail = flags[i];
2969
2970                 coding->flags |= CODING_FLAG_ISO_DESIGNATION;
2971                 if (INTEGERP (XCONS (tail)->car)
2972                     && (charset = XINT (XCONS (tail)->car),
2973                         CHARSET_VALID_P (charset))
2974                     || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2975                   {
2976                     CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = charset;
2977                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset) =i;
2978                   }
2979                 else
2980                   CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2981                 tail = XCONS (tail)->cdr;
2982                 while (CONSP (tail))
2983                   {
2984                     if (INTEGERP (XCONS (tail)->car)
2985                         && (charset = XINT (XCONS (tail)->car),
2986                             CHARSET_VALID_P (charset))
2987                         || (charset = get_charset_id (XCONS (tail)->car)) >= 0)
2988                       CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
2989                         = i;
2990                     else if (EQ (XCONS (tail)->car, Qt))
2991                       reg_bits |= 1 << i;
2992                     tail = XCONS (tail)->cdr;
2993                   }
2994               }
2995             else
2996               CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i) = -1;
2997
2998             CODING_SPEC_ISO_DESIGNATION (coding, i)
2999               = CODING_SPEC_ISO_INITIAL_DESIGNATION (coding, i);
3000           }
3001
3002         if (reg_bits && ! (coding->flags & CODING_FLAG_ISO_LOCKING_SHIFT))
3003           {
3004             /* REG 1 can be used only by locking shift in 7-bit env.  */
3005             if (coding->flags & CODING_FLAG_ISO_SEVEN_BITS)
3006               reg_bits &= ~2;
3007             if (! (coding->flags & CODING_FLAG_ISO_SINGLE_SHIFT))
3008               /* Without any shifting, only REG 0 and 1 can be used.  */
3009               reg_bits &= 3;
3010           }
3011
3012         if (reg_bits)
3013           for (charset = 0; charset <= MAX_CHARSET; charset++)
3014             {
3015               if (CHARSET_VALID_P (charset))
3016                 {
3017                   /* There exist some default graphic registers to be
3018                      used CHARSET.  */
3019
3020                   /* We had better avoid designating a charset of
3021                      CHARS96 to REG 0 as far as possible.  */
3022                   if (CHARSET_CHARS (charset) == 96)
3023                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3024                       = (reg_bits & 2
3025                          ? 1 : (reg_bits & 4 ? 2 : (reg_bits & 8 ? 3 : 0)));
3026                   else
3027                     CODING_SPEC_ISO_REQUESTED_DESIGNATION (coding, charset)
3028                       = (reg_bits & 1
3029                          ? 0 : (reg_bits & 2 ? 1 : (reg_bits & 4 ? 2 : 3)));
3030                 }
3031             }
3032       }
3033       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3034       coding->spec.iso2022.last_invalid_designation_register = -1;
3035       break;
3036
3037     case 3:
3038       coding->type = coding_type_big5;
3039       coding->common_flags
3040         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3041       coding->flags
3042         = (NILP (XVECTOR (coding_spec)->contents[4])
3043            ? CODING_FLAG_BIG5_HKU
3044            : CODING_FLAG_BIG5_ETEN);
3045       break;
3046
3047     case 4:
3048       coding->type = coding_type_ccl;
3049       coding->common_flags
3050         |= CODING_REQUIRE_DECODING_MASK | CODING_REQUIRE_ENCODING_MASK;
3051       {
3052         Lisp_Object val = XVECTOR (coding_spec)->contents[4];
3053         Lisp_Object decoder, encoder;
3054
3055         if (CONSP  (val)
3056             && SYMBOLP (XCONS (val)->car)
3057             && !NILP (decoder = Fget (XCONS (val)->car, Qccl_program_idx))
3058             && !NILP (decoder = Fcdr (Faref (Vccl_program_table, decoder)))
3059             && SYMBOLP (XCONS (val)->cdr)
3060             && !NILP (encoder = Fget (XCONS (val)->cdr, Qccl_program_idx))
3061             && !NILP (encoder = Fcdr (Faref (Vccl_program_table, encoder))))
3062           {
3063             setup_ccl_program (&(coding->spec.ccl.decoder), decoder);
3064             setup_ccl_program (&(coding->spec.ccl.encoder), encoder);
3065           }
3066         else
3067           goto label_invalid_coding_system;
3068       }
3069       coding->common_flags |= CODING_REQUIRE_FLUSHING_MASK;
3070       break;
3071
3072     case 5:
3073       coding->type = coding_type_raw_text;
3074       break;
3075
3076     default:
3077       goto label_invalid_coding_system;
3078     }
3079   return 0;
3080
3081  label_invalid_coding_system:
3082   coding->type = coding_type_no_conversion;
3083   coding->category_idx = CODING_CATEGORY_IDX_BINARY;
3084   coding->common_flags = 0;
3085   coding->eol_type = CODING_EOL_LF;
3086   coding->pre_write_conversion = coding->post_read_conversion = Qnil;
3087   return -1;
3088 }
3089
3090 /* Emacs has a mechanism to automatically detect a coding system if it
3091    is one of Emacs' internal format, ISO2022, SJIS, and BIG5.  But,
3092    it's impossible to distinguish some coding systems accurately
3093    because they use the same range of codes.  So, at first, coding
3094    systems are categorized into 7, those are:
3095
3096    o coding-category-emacs-mule
3097
3098         The category for a coding system which has the same code range
3099         as Emacs' internal format.  Assigned the coding-system (Lisp
3100         symbol) `emacs-mule' by default.
3101
3102    o coding-category-sjis
3103
3104         The category for a coding system which has the same code range
3105         as SJIS.  Assigned the coding-system (Lisp
3106         symbol) `japanese-shift-jis' by default.
3107
3108    o coding-category-iso-7
3109
3110         The category for a coding system which has the same code range
3111         as ISO2022 of 7-bit environment.  This doesn't use any locking
3112         shift and single shift functions.  This can encode/decode all
3113         charsets.  Assigned the coding-system (Lisp symbol)
3114         `iso-2022-7bit' by default.
3115
3116    o coding-category-iso-7-tight
3117
3118         Same as coding-category-iso-7 except that this can
3119         encode/decode only the specified charsets.
3120
3121    o coding-category-iso-8-1
3122
3123         The category for a coding system which has the same code range
3124         as ISO2022 of 8-bit environment and graphic plane 1 used only
3125         for DIMENSION1 charset.  This doesn't use any locking shift
3126         and single shift functions.  Assigned the coding-system (Lisp
3127         symbol) `iso-latin-1' by default.
3128
3129    o coding-category-iso-8-2
3130
3131         The category for a coding system which has the same code range
3132         as ISO2022 of 8-bit environment and graphic plane 1 used only
3133         for DIMENSION2 charset.  This doesn't use any locking shift
3134         and single shift functions.  Assigned the coding-system (Lisp
3135         symbol) `japanese-iso-8bit' by default.
3136
3137    o coding-category-iso-7-else
3138
3139         The category for a coding system which has the same code range
3140         as ISO2022 of 7-bit environemnt but uses locking shift or
3141         single shift functions.  Assigned the coding-system (Lisp
3142         symbol) `iso-2022-7bit-lock' by default.
3143
3144    o coding-category-iso-8-else
3145
3146         The category for a coding system which has the same code range
3147         as ISO2022 of 8-bit environemnt but uses locking shift or
3148         single shift functions.  Assigned the coding-system (Lisp
3149         symbol) `iso-2022-8bit-ss2' by default.
3150
3151    o coding-category-big5
3152
3153         The category for a coding system which has the same code range
3154         as BIG5.  Assigned the coding-system (Lisp symbol)
3155         `cn-big5' by default.
3156
3157    o coding-category-binary
3158
3159         The category for a coding system not categorized in any of the
3160         above.  Assigned the coding-system (Lisp symbol)
3161         `no-conversion' by default.
3162
3163    Each of them is a Lisp symbol and the value is an actual
3164    `coding-system's (this is also a Lisp symbol) assigned by a user.
3165    What Emacs does actually is to detect a category of coding system.
3166    Then, it uses a `coding-system' assigned to it.  If Emacs can't
3167    decide only one possible category, it selects a category of the
3168    highest priority.  Priorities of categories are also specified by a
3169    user in a Lisp variable `coding-category-list'.
3170
3171 */
3172
3173 static
3174 int ascii_skip_code[256];
3175
3176 /* Detect how a text of length SRC_BYTES pointed by SOURCE is encoded.
3177    If it detects possible coding systems, return an integer in which
3178    appropriate flag bits are set.  Flag bits are defined by macros
3179    CODING_CATEGORY_MASK_XXX in `coding.h'.
3180
3181    How many ASCII characters are at the head is returned as *SKIP.  */
3182
3183 static int
3184 detect_coding_mask (source, src_bytes, priorities, skip)
3185      unsigned char *source;
3186      int src_bytes, *priorities, *skip;
3187 {
3188   register unsigned char c;
3189   unsigned char *src = source, *src_end = source + src_bytes;
3190   unsigned int mask;
3191   int i;
3192
3193   /* At first, skip all ASCII characters and control characters except
3194      for three ISO2022 specific control characters.  */
3195   ascii_skip_code[ISO_CODE_SO] = 0;
3196   ascii_skip_code[ISO_CODE_SI] = 0;
3197   ascii_skip_code[ISO_CODE_ESC] = 0;
3198
3199  label_loop_detect_coding:
3200   while (src < src_end && ascii_skip_code[*src]) src++;
3201   *skip = src - source;
3202
3203   if (src >= src_end)
3204     /* We found nothing other than ASCII.  There's nothing to do.  */
3205     return 0;
3206
3207   c = *src;
3208   /* The text seems to be encoded in some multilingual coding system.
3209      Now, try to find in which coding system the text is encoded.  */
3210   if (c < 0x80)
3211     {
3212       /* i.e. (c == ISO_CODE_ESC || c == ISO_CODE_SI || c == ISO_CODE_SO) */
3213       /* C is an ISO2022 specific control code of C0.  */
3214       mask = detect_coding_iso2022 (src, src_end);
3215       if (mask == 0)
3216         {
3217           /* No valid ISO2022 code follows C.  Try again.  */
3218           src++;
3219           if (c == ISO_CODE_ESC)
3220             ascii_skip_code[ISO_CODE_ESC] = 1;
3221           else
3222             ascii_skip_code[ISO_CODE_SO] = ascii_skip_code[ISO_CODE_SI] = 1;
3223           goto label_loop_detect_coding;
3224         }
3225       if (priorities)
3226         goto label_return_highest_only;
3227     }
3228   else
3229     {
3230       int try;
3231
3232       if (c < 0xA0)
3233         {
3234           /* C is the first byte of SJIS character code,
3235              or a leading-code of Emacs' internal format (emacs-mule).  */
3236           try = CODING_CATEGORY_MASK_SJIS | CODING_CATEGORY_MASK_EMACS_MULE;
3237
3238           /* Or, if C is a special latin extra code,
3239              or is an ISO2022 specific control code of C1 (SS2 or SS3),
3240              or is an ISO2022 control-sequence-introducer (CSI),
3241              we should also consider the possibility of ISO2022 codings.  */
3242           if ((VECTORP (Vlatin_extra_code_table)
3243                && !NILP (XVECTOR (Vlatin_extra_code_table)->contents[c]))
3244               || (c == ISO_CODE_SS2 || c == ISO_CODE_SS3)
3245               || (c == ISO_CODE_CSI
3246                   && (src < src_end
3247                       && (*src == ']'
3248                           || ((*src == '0' || *src == '1' || *src == '2')
3249                               && src + 1 < src_end
3250                               && src[1] == ']')))))
3251             try |= (CODING_CATEGORY_MASK_ISO_8_ELSE
3252                      | CODING_CATEGORY_MASK_ISO_8BIT);
3253         }
3254       else
3255         /* C is a character of ISO2022 in graphic plane right,
3256            or a SJIS's 1-byte character code (i.e. JISX0201),
3257            or the first byte of BIG5's 2-byte code.  */
3258         try = (CODING_CATEGORY_MASK_ISO_8_ELSE
3259                 | CODING_CATEGORY_MASK_ISO_8BIT
3260                 | CODING_CATEGORY_MASK_SJIS
3261                 | CODING_CATEGORY_MASK_BIG5);
3262
3263       mask = 0;
3264       if (priorities)
3265         {
3266           for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3267             {
3268               if (priorities[i] & try & CODING_CATEGORY_MASK_ISO)
3269                 mask = detect_coding_iso2022 (src, src_end);
3270               else if (priorities[i] & try & CODING_CATEGORY_MASK_SJIS)
3271                 mask = detect_coding_sjis (src, src_end);
3272               else if (priorities[i] & try & CODING_CATEGORY_MASK_BIG5)
3273                 mask = detect_coding_big5 (src, src_end);
3274               else if (priorities[i] & try & CODING_CATEGORY_MASK_EMACS_MULE)
3275                 mask = detect_coding_emacs_mule (src, src_end);
3276               else if (priorities[i] & CODING_CATEGORY_MASK_RAW_TEXT)
3277                 mask = CODING_CATEGORY_MASK_RAW_TEXT;
3278               else if (priorities[i] & CODING_CATEGORY_MASK_BINARY)
3279                 mask = CODING_CATEGORY_MASK_BINARY;
3280               if (mask)
3281                 goto label_return_highest_only;
3282             }
3283           return CODING_CATEGORY_MASK_RAW_TEXT;
3284         }
3285       if (try & CODING_CATEGORY_MASK_ISO)
3286         mask |= detect_coding_iso2022 (src, src_end);
3287       if (try & CODING_CATEGORY_MASK_SJIS)
3288         mask |= detect_coding_sjis (src, src_end);
3289       if (try & CODING_CATEGORY_MASK_BIG5)
3290         mask |= detect_coding_big5 (src, src_end);
3291       if (try & CODING_CATEGORY_MASK_EMACS_MULE)
3292         mask |= detect_coding_emacs_mule (src, src_end);
3293     }
3294   return (mask | CODING_CATEGORY_MASK_RAW_TEXT | CODING_CATEGORY_MASK_BINARY);
3295
3296  label_return_highest_only:
3297   for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
3298     {
3299       if (mask & priorities[i])
3300         return priorities[i];
3301     }
3302   return CODING_CATEGORY_MASK_RAW_TEXT;
3303 }
3304
3305 /* Detect how a text of length SRC_BYTES pointed by SRC is encoded.
3306    The information of the detected coding system is set in CODING.  */
3307
3308 void
3309 detect_coding (coding, src, src_bytes)
3310      struct coding_system *coding;
3311      unsigned char *src;
3312      int src_bytes;
3313 {
3314   unsigned int idx;
3315   int skip, mask, i;
3316   Lisp_Object val = Vcoding_category_list;
3317
3318   mask = detect_coding_mask (src, src_bytes, coding_priorities, &skip);
3319   coding->heading_ascii = skip;
3320
3321   if (!mask) return;
3322
3323   /* We found a single coding system of the highest priority in MASK.  */
3324   idx = 0;
3325   while (mask && ! (mask & 1)) mask >>= 1, idx++;
3326   if (! mask)
3327     idx = CODING_CATEGORY_IDX_RAW_TEXT;
3328
3329   val = XSYMBOL (XVECTOR (Vcoding_category_table)->contents[idx])->value;
3330
3331   if (coding->eol_type != CODING_EOL_UNDECIDED)
3332     {
3333       Lisp_Object tmp = Fget (val, Qeol_type);
3334
3335       if (VECTORP (tmp))
3336         val = XVECTOR (tmp)->contents[coding->eol_type];
3337     }
3338   setup_coding_system (val, coding);
3339   /* Set this again because setup_coding_system reset this member.  */
3340   coding->heading_ascii = skip;
3341 }
3342
3343 /* Detect how end-of-line of a text of length SRC_BYTES pointed by
3344    SOURCE is encoded.  Return one of CODING_EOL_LF, CODING_EOL_CRLF,
3345    CODING_EOL_CR, and CODING_EOL_UNDECIDED.
3346
3347    How many non-eol characters are at the head is returned as *SKIP.  */
3348
3349 #define MAX_EOL_CHECK_COUNT 3
3350
3351 static int
3352 detect_eol_type (source, src_bytes, skip)
3353      unsigned char *source;
3354      int src_bytes, *skip;
3355 {
3356   unsigned char *src = source, *src_end = src + src_bytes;
3357   unsigned char c;
3358   int total = 0;                /* How many end-of-lines are found so far.  */
3359   int eol_type = CODING_EOL_UNDECIDED;
3360   int this_eol_type;
3361
3362   *skip = 0;
3363
3364   while (src < src_end && total < MAX_EOL_CHECK_COUNT)
3365     {
3366       c = *src++;
3367       if (c == '\n' || c == '\r')
3368         {
3369           if (*skip == 0)
3370             *skip = src - 1 - source;
3371           total++;
3372           if (c == '\n')
3373             this_eol_type = CODING_EOL_LF;
3374           else if (src >= src_end || *src != '\n')
3375             this_eol_type = CODING_EOL_CR;
3376           else
3377             this_eol_type = CODING_EOL_CRLF, src++;
3378
3379           if (eol_type == CODING_EOL_UNDECIDED)
3380             /* This is the first end-of-line.  */
3381             eol_type = this_eol_type;
3382           else if (eol_type != this_eol_type)
3383             {
3384               /* The found type is different from what found before.  */
3385               eol_type = CODING_EOL_INCONSISTENT;
3386               break;
3387             }
3388         }
3389     }
3390
3391   if (*skip == 0)
3392     *skip = src_end - source;
3393   return eol_type;
3394 }
3395
3396 /* Detect how end-of-line of a text of length SRC_BYTES pointed by SRC
3397    is encoded.  If it detects an appropriate format of end-of-line, it
3398    sets the information in *CODING.  */
3399
3400 void
3401 detect_eol (coding, src, src_bytes)
3402      struct coding_system *coding;
3403      unsigned char *src;
3404      int src_bytes;
3405 {
3406   Lisp_Object val;
3407   int skip;
3408   int eol_type = detect_eol_type (src, src_bytes, &skip);
3409
3410   if (coding->heading_ascii > skip)
3411     coding->heading_ascii = skip;
3412   else
3413     skip = coding->heading_ascii;
3414
3415   if (eol_type == CODING_EOL_UNDECIDED)
3416     return;
3417   if (eol_type == CODING_EOL_INCONSISTENT)
3418     {
3419 #if 0
3420       /* This code is suppressed until we find a better way to
3421          distinguish raw text file and binary file.  */
3422
3423       /* If we have already detected that the coding is raw-text, the
3424          coding should actually be no-conversion.  */
3425       if (coding->type == coding_type_raw_text)
3426         {
3427           setup_coding_system (Qno_conversion, coding);
3428           return;
3429         }
3430       /* Else, let's decode only text code anyway.  */
3431 #endif /* 0 */
3432       eol_type = CODING_EOL_LF;
3433     }
3434
3435   val = Fget (coding->symbol, Qeol_type);
3436   if (VECTORP (val) && XVECTOR (val)->size == 3)
3437     {
3438       setup_coding_system (XVECTOR (val)->contents[eol_type], coding);
3439       coding->heading_ascii = skip;
3440     }
3441 }
3442
3443 #define CONVERSION_BUFFER_EXTRA_ROOM 256
3444
3445 #define DECODING_BUFFER_MAG(coding)                                          \
3446   (coding->type == coding_type_iso2022                                       \
3447    ? 3                                                                       \
3448    : ((coding->type == coding_type_sjis || coding->type == coding_type_big5) \
3449       ? 2                                                                    \
3450       : (coding->type == coding_type_raw_text                                \
3451          ? 1                                                                 \
3452          : (coding->type == coding_type_ccl                                  \
3453             ? coding->spec.ccl.decoder.buf_magnification                     \
3454             : 2))))
3455
3456 /* Return maximum size (bytes) of a buffer enough for decoding
3457    SRC_BYTES of text encoded in CODING.  */
3458
3459 int
3460 decoding_buffer_size (coding, src_bytes)
3461      struct coding_system *coding;
3462      int src_bytes;
3463 {
3464   return (src_bytes * DECODING_BUFFER_MAG (coding)
3465           + CONVERSION_BUFFER_EXTRA_ROOM);
3466 }
3467
3468 /* Return maximum size (bytes) of a buffer enough for encoding
3469    SRC_BYTES of text to CODING.  */
3470
3471 int
3472 encoding_buffer_size (coding, src_bytes)
3473      struct coding_system *coding;
3474      int src_bytes;
3475 {
3476   int magnification;
3477
3478   if (coding->type == coding_type_ccl)
3479     magnification = coding->spec.ccl.encoder.buf_magnification;
3480   else
3481     magnification = 3;
3482
3483   return (src_bytes * magnification + CONVERSION_BUFFER_EXTRA_ROOM);
3484 }
3485
3486 #ifndef MINIMUM_CONVERSION_BUFFER_SIZE
3487 #define MINIMUM_CONVERSION_BUFFER_SIZE 1024
3488 #endif
3489
3490 char *conversion_buffer;
3491 int conversion_buffer_size;
3492
3493 /* Return a pointer to a SIZE bytes of buffer to be used for encoding
3494    or decoding.  Sufficient memory is allocated automatically.  If we
3495    run out of memory, return NULL.  */
3496
3497 char *
3498 get_conversion_buffer (size)
3499      int size;
3500 {
3501   if (size > conversion_buffer_size)
3502     {
3503       char *buf;
3504       int real_size = conversion_buffer_size * 2;
3505
3506       while (real_size < size) real_size *= 2;
3507       buf = (char *) xmalloc (real_size);
3508       xfree (conversion_buffer);
3509       conversion_buffer = buf;
3510       conversion_buffer_size = real_size;
3511     }
3512   return conversion_buffer;
3513 }
3514
3515 int
3516 ccl_coding_driver (coding, source, destination, src_bytes, dst_bytes, encodep)
3517      struct coding_system *coding;
3518      unsigned char *source, *destination;
3519      int src_bytes, dst_bytes, encodep;
3520 {
3521   struct ccl_program *ccl
3522     = encodep ? &coding->spec.ccl.encoder : &coding->spec.ccl.decoder;
3523   int result;
3524
3525   coding->produced = ccl_driver (ccl, source, destination,
3526                                  src_bytes, dst_bytes, &(coding->consumed));
3527   if (encodep)
3528     {
3529       coding->produced_char = coding->produced;
3530       coding->consumed_char
3531         = multibyte_chars_in_text (source, coding->consumed);
3532     }
3533   else
3534     {
3535       coding->produced_char
3536         = multibyte_chars_in_text (destination, coding->produced);
3537       coding->consumed_char = coding->consumed;
3538     }
3539   switch (ccl->status)
3540     {
3541     case CCL_STAT_SUSPEND_BY_SRC:
3542       result = CODING_FINISH_INSUFFICIENT_SRC;
3543       break;
3544     case CCL_STAT_SUSPEND_BY_DST:
3545       result = CODING_FINISH_INSUFFICIENT_DST;
3546       break;
3547     default:
3548       result = CODING_FINISH_NORMAL;
3549       break;
3550     }
3551   return result;
3552 }
3553
3554 /* See "GENERAL NOTES about `decode_coding_XXX ()' functions".  Before
3555    decoding, it may detect coding system and format of end-of-line if
3556    those are not yet decided.  */
3557
3558 int
3559 decode_coding (coding, source, destination, src_bytes, dst_bytes)
3560      struct coding_system *coding;
3561      unsigned char *source, *destination;
3562      int src_bytes, dst_bytes;
3563 {
3564   int result;
3565
3566   if (src_bytes <= 0)
3567     {
3568       coding->produced = coding->produced_char = 0;
3569       coding->consumed = coding->consumed_char = 0;
3570       coding->fake_multibyte = 0;
3571       return CODING_FINISH_NORMAL;
3572     }
3573
3574   if (coding->type == coding_type_undecided)
3575     detect_coding (coding, source, src_bytes);
3576
3577   if (coding->eol_type == CODING_EOL_UNDECIDED)
3578     detect_eol (coding, source, src_bytes);
3579
3580   switch (coding->type)
3581     {
3582     case coding_type_emacs_mule:
3583     case coding_type_undecided:
3584     case coding_type_raw_text:
3585       if (coding->eol_type == CODING_EOL_LF
3586           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3587         goto label_no_conversion;
3588       result = decode_eol (coding, source, destination, src_bytes, dst_bytes);
3589       break;
3590
3591     case coding_type_sjis:
3592       result = decode_coding_sjis_big5 (coding, source, destination,
3593                                         src_bytes, dst_bytes, 1);
3594       break;
3595
3596     case coding_type_iso2022:
3597       result = decode_coding_iso2022 (coding, source, destination,
3598                                       src_bytes, dst_bytes);
3599       break;
3600
3601     case coding_type_big5:
3602       result = decode_coding_sjis_big5 (coding, source, destination,
3603                                         src_bytes, dst_bytes, 0);
3604       break;
3605
3606     case coding_type_ccl:
3607       result = ccl_coding_driver (coding, source, destination,
3608                                   src_bytes, dst_bytes, 0);
3609       break;
3610
3611     default:                    /* i.e. case coding_type_no_conversion: */
3612     label_no_conversion:
3613       if (dst_bytes && src_bytes > dst_bytes)
3614         {
3615           coding->produced = dst_bytes;
3616           result = CODING_FINISH_INSUFFICIENT_DST;
3617         }
3618       else
3619         {
3620           coding->produced = src_bytes;
3621           result = CODING_FINISH_NORMAL;
3622         }
3623       if (dst_bytes)
3624         bcopy (source, destination, coding->produced);
3625       else
3626         safe_bcopy (source, destination, coding->produced);
3627       coding->fake_multibyte = 1;
3628       coding->consumed
3629         = coding->consumed_char = coding->produced_char = coding->produced;
3630       break;
3631     }
3632
3633   return result;
3634 }
3635
3636 /* See "GENERAL NOTES about `encode_coding_XXX ()' functions".  */
3637
3638 int
3639 encode_coding (coding, source, destination, src_bytes, dst_bytes)
3640      struct coding_system *coding;
3641      unsigned char *source, *destination;
3642      int src_bytes, dst_bytes;
3643 {
3644   int result;
3645
3646   if (src_bytes <= 0)
3647     {
3648       coding->produced = coding->produced_char = 0;
3649       coding->consumed = coding->consumed_char = 0;
3650       coding->fake_multibyte = 0;
3651       return CODING_FINISH_NORMAL;
3652     }
3653
3654   switch (coding->type)
3655     {
3656     case coding_type_emacs_mule:
3657     case coding_type_undecided:
3658     case coding_type_raw_text:
3659       if (coding->eol_type == CODING_EOL_LF
3660           ||  coding->eol_type == CODING_EOL_UNDECIDED)
3661         goto label_no_conversion;
3662       result = encode_eol (coding, source, destination, src_bytes, dst_bytes);
3663       break;
3664
3665     case coding_type_sjis:
3666       result = encode_coding_sjis_big5 (coding, source, destination,
3667                                         src_bytes, dst_bytes, 1);
3668       break;
3669
3670     case coding_type_iso2022:
3671       result = encode_coding_iso2022 (coding, source, destination,
3672                                       src_bytes, dst_bytes);
3673       break;
3674
3675     case coding_type_big5:
3676       result = encode_coding_sjis_big5 (coding, source, destination,
3677                                         src_bytes, dst_bytes, 0);
3678       break;
3679
3680     case coding_type_ccl:
3681       result = ccl_coding_driver (coding, source, destination,
3682                                   src_bytes, dst_bytes, 1);
3683       break;
3684
3685     default:                    /* i.e. case coding_type_no_conversion: */
3686     label_no_conversion:
3687       if (dst_bytes && src_bytes > dst_bytes)
3688         {
3689           coding->produced = dst_bytes;
3690           result = CODING_FINISH_INSUFFICIENT_DST;
3691         }
3692       else
3693         {
3694           coding->produced = src_bytes;
3695           result = CODING_FINISH_NORMAL;
3696         }
3697       if (dst_bytes)
3698         bcopy (source, destination, coding->produced);
3699       else
3700         safe_bcopy (source, destination, coding->produced);
3701       if (coding->mode & CODING_MODE_SELECTIVE_DISPLAY)
3702         {
3703           unsigned char *p = destination, *pend = p + coding->produced;
3704           while (p < pend)
3705             if (*p++ == '\015') p[-1] = '\n';
3706         }
3707       coding->fake_multibyte = 1;
3708       coding->consumed
3709         = coding->consumed_char = coding->produced_char = coding->produced;
3710       break;
3711     }
3712
3713   return result;
3714 }
3715
3716 /* Scan text in the region between *BEG and *END (byte positions),
3717    skip characters which we don't have to decode by coding system
3718    CODING at the head and tail, then set *BEG and *END to the region
3719    of the text we actually have to convert.  The caller should move
3720    the gap out of the region in advance.
3721
3722    If STR is not NULL, *BEG and *END are indices into STR.  */
3723
3724 static void
3725 shrink_decoding_region (beg, end, coding, str)
3726      int *beg, *end;
3727      struct coding_system *coding;
3728      unsigned char *str;
3729 {
3730   unsigned char *begp_orig, *begp, *endp_orig, *endp, c;
3731   int eol_conversion;
3732
3733   if (coding->type == coding_type_ccl
3734       || coding->type == coding_type_undecided
3735       || !NILP (coding->post_read_conversion))
3736     {
3737       /* We can't skip any data.  */
3738       return;
3739     }
3740   else if (coding->type == coding_type_no_conversion)
3741     {
3742       /* We need no conversion, but don't have to skip any data here.
3743          Decoding routine handles them effectively anyway.  */
3744       return;
3745     }
3746
3747   eol_conversion = (coding->eol_type != CODING_EOL_LF);
3748
3749   if ((! eol_conversion) && (coding->heading_ascii >= 0))
3750     /* Detection routine has already found how much we can skip at the
3751        head.  */
3752     *beg += coding->heading_ascii;
3753
3754   if (str)
3755     {
3756       begp_orig = begp = str + *beg;
3757       endp_orig = endp = str + *end;
3758     }
3759   else
3760     {
3761       begp_orig = begp = BYTE_POS_ADDR (*beg);
3762       endp_orig = endp = begp + *end - *beg;
3763     }
3764
3765   switch (coding->type)
3766     {
3767     case coding_type_emacs_mule:
3768     case coding_type_raw_text:
3769       if (eol_conversion)
3770         {
3771           if (coding->heading_ascii < 0)
3772             while (begp < endp && *begp != '\r' && *begp < 0x80) begp++;
3773           while (begp < endp && endp[-1] != '\r' && endp[-1] < 0x80)
3774             endp--;
3775           /* Do not consider LF as ascii if preceded by CR, since that
3776              confuses eol decoding. */
3777           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3778             endp++;
3779         }
3780       else
3781         begp = endp;
3782       break;
3783
3784     case coding_type_sjis:
3785     case coding_type_big5:
3786       /* We can skip all ASCII characters at the head.  */
3787       if (coding->heading_ascii < 0)
3788         {
3789           if (eol_conversion)
3790             while (begp < endp && *begp < 0x80 && *begp != '\r') begp++;
3791           else
3792             while (begp < endp && *begp < 0x80) begp++;
3793         }
3794       /* We can skip all ASCII characters at the tail except for the
3795          second byte of SJIS or BIG5 code.  */
3796       if (eol_conversion)
3797         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\r') endp--;
3798       else
3799         while (begp < endp && endp[-1] < 0x80) endp--;
3800       /* Do not consider LF as ascii if preceded by CR, since that
3801          confuses eol decoding. */
3802       if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3803         endp++;
3804       if (begp < endp && endp < endp_orig && endp[-1] >= 0x80)
3805         endp++;
3806       break;
3807
3808     default:            /* i.e. case coding_type_iso2022: */
3809       if (coding->heading_ascii < 0)
3810         {
3811           /* We can skip all ASCII characters at the head except for a
3812              few control codes.  */
3813           while (begp < endp && (c = *begp) < 0x80
3814                  && c != ISO_CODE_CR && c != ISO_CODE_SO
3815                  && c != ISO_CODE_SI && c != ISO_CODE_ESC
3816                  && (!eol_conversion || c != ISO_CODE_LF))
3817             begp++;
3818         }
3819       switch (coding->category_idx)
3820         {
3821         case CODING_CATEGORY_IDX_ISO_8_1:
3822         case CODING_CATEGORY_IDX_ISO_8_2:
3823           /* We can skip all ASCII characters at the tail.  */
3824           if (eol_conversion)
3825             while (begp < endp && (c = endp[-1]) < 0x80 && c != '\r') endp--;
3826           else
3827             while (begp < endp && endp[-1] < 0x80) endp--;
3828           /* Do not consider LF as ascii if preceded by CR, since that
3829              confuses eol decoding. */
3830           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3831             endp++;
3832           break;
3833
3834         case CODING_CATEGORY_IDX_ISO_7:
3835         case CODING_CATEGORY_IDX_ISO_7_TIGHT:
3836           /* We can skip all charactes at the tail except for ESC and
3837              the following 2-byte at the tail.  */
3838           if (eol_conversion)
3839             while (begp < endp
3840                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC && c != '\r')
3841               endp--;
3842           else
3843             while (begp < endp
3844                    && (c = endp[-1]) < 0x80 && c != ISO_CODE_ESC)
3845               endp--;
3846           /* Do not consider LF as ascii if preceded by CR, since that
3847              confuses eol decoding. */
3848           if (begp < endp && endp < endp_orig && endp[-1] == '\r' && endp[0] == '\n')
3849             endp++;
3850           if (begp < endp && endp[-1] == ISO_CODE_ESC)
3851             {
3852               if (endp + 1 < endp_orig && end[0] == '(' && end[1] == 'B')
3853                 /* This is an ASCII designation sequence.  We can
3854                     surely skip the tail.  */
3855                 endp += 2;
3856               else
3857                 /* Hmmm, we can't skip the tail.  */
3858                 endp = endp_orig;
3859             }
3860         }
3861     }
3862   *beg += begp - begp_orig;
3863   *end += endp - endp_orig;
3864   return;
3865 }
3866
3867 /* Like shrink_decoding_region but for encoding.  */
3868
3869 static void
3870 shrink_encoding_region (beg, end, coding, str)
3871      int *beg, *end;
3872      struct coding_system *coding;
3873      unsigned char *str;
3874 {
3875   unsigned char *begp_orig, *begp, *endp_orig, *endp;
3876   int eol_conversion;
3877
3878   if (coding->type == coding_type_ccl)
3879     /* We can't skip any data.  */
3880     return;
3881   else if (coding->type == coding_type_no_conversion)
3882     {
3883       /* We need no conversion.  */
3884       *beg = *end;
3885       return;
3886     }
3887
3888   if (str)
3889     {
3890       begp_orig = begp = str + *beg;
3891       endp_orig = endp = str + *end;
3892     }
3893   else
3894     {
3895       begp_orig = begp = BYTE_POS_ADDR (*beg);
3896       endp_orig = endp = begp + *end - *beg;
3897     }
3898
3899   eol_conversion = (coding->eol_type == CODING_EOL_CR
3900                     || coding->eol_type == CODING_EOL_CRLF);
3901
3902   /* Here, we don't have to check coding->pre_write_conversion because
3903      the caller is expected to have handled it already.  */
3904   switch (coding->type)
3905     {
3906     case coding_type_undecided:
3907     case coding_type_emacs_mule:
3908     case coding_type_raw_text:
3909       if (eol_conversion)
3910         {
3911           while (begp < endp && *begp != '\n') begp++;
3912           while (begp < endp && endp[-1] != '\n') endp--;
3913         }
3914       else
3915         begp = endp;
3916       break;
3917
3918     case coding_type_iso2022:
3919       if (coding->flags & CODING_FLAG_ISO_DESIGNATE_AT_BOL)
3920         {
3921           unsigned char *bol = begp;
3922           while (begp < endp && *begp < 0x80)
3923             {
3924               begp++;
3925               if (begp[-1] == '\n')
3926                 bol = begp;
3927             }
3928           begp = bol;
3929           goto label_skip_tail;
3930         }
3931       /* fall down ... */
3932
3933     default:
3934       /* We can skip all ASCII characters at the head and tail.  */
3935       if (eol_conversion)
3936         while (begp < endp && *begp < 0x80 && *begp != '\n') begp++;
3937       else
3938         while (begp < endp && *begp < 0x80) begp++;
3939     label_skip_tail:
3940       if (eol_conversion)
3941         while (begp < endp && endp[-1] < 0x80 && endp[-1] != '\n') endp--;
3942       else
3943         while (begp < endp && *(endp - 1) < 0x80) endp--;
3944       break;
3945     }
3946
3947   *beg += begp - begp_orig;
3948   *end += endp - endp_orig;
3949   return;
3950 }
3951
3952 /* Decode (if ENCODEP is zero) or encode (if ENCODEP is nonzero) the
3953    text from FROM to TO (byte positions are FROM_BYTE and TO_BYTE) by
3954    coding system CODING, and return the status code of code conversion
3955    (currently, this value has no meaning).
3956
3957    How many characters (and bytes) are converted to how many
3958    characters (and bytes) are recorded in members of the structure
3959    CODING.
3960
3961    If REPLACE is nonzero, we do various things as if the original text
3962    is deleted and a new text is inserted.  See the comments in
3963    replace_range (insdel.c) to know what we are doing.  */
3964
3965 int
3966 code_convert_region (from, from_byte, to, to_byte, coding, encodep, replace)
3967      int from, from_byte, to, to_byte, encodep, replace;
3968      struct coding_system *coding;
3969 {
3970   int len = to - from, len_byte = to_byte - from_byte;
3971   int require, inserted, inserted_byte;
3972   int head_skip, tail_skip, total_skip;
3973   Lisp_Object saved_coding_symbol = Qnil;
3974   int multibyte = !NILP (current_buffer->enable_multibyte_characters);
3975   int first = 1;
3976   int fake_multibyte = 0;
3977   unsigned char *src, *dst;
3978   Lisp_Object deletion = Qnil;
3979
3980   if (from < PT && PT < to)
3981     SET_PT_BOTH (from, from_byte);
3982
3983   if (replace)
3984     {
3985       int saved_from = from;
3986
3987       prepare_to_modify_buffer (from, to, &from);
3988       if (saved_from != from)
3989         {
3990           to = from + len;
3991           if (multibyte)
3992             from_byte = CHAR_TO_BYTE (from), to_byte = CHAR_TO_BYTE (to);
3993           else
3994             from_byte = from, to_byte = to;
3995           len_byte = to_byte - from_byte;
3996         }
3997     }
3998
3999   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4000     {
4001       /* We must detect encoding of text and eol format.  */
4002
4003       if (from < GPT && to > GPT)
4004         move_gap_both (from, from_byte);
4005       if (coding->type == coding_type_undecided)
4006         {
4007           detect_coding (coding, BYTE_POS_ADDR (from_byte), len_byte);
4008           if (coding->type == coding_type_undecided)
4009             /* It seems that the text contains only ASCII, but we
4010                should not left it undecided because the deeper
4011                decoding routine (decode_coding) tries to detect the
4012                encodings again in vain.  */
4013             coding->type = coding_type_emacs_mule;
4014         }
4015       if (coding->eol_type == CODING_EOL_UNDECIDED)
4016         {
4017           saved_coding_symbol = coding->symbol;
4018           detect_eol (coding, BYTE_POS_ADDR (from_byte), len_byte);
4019           if (coding->eol_type == CODING_EOL_UNDECIDED)
4020             coding->eol_type = CODING_EOL_LF;
4021           /* We had better recover the original eol format if we
4022              encounter an inconsitent eol format while decoding.  */
4023           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4024         }
4025     }
4026
4027   coding->consumed_char = len, coding->consumed = len_byte;
4028
4029   if (encodep
4030       ? ! CODING_REQUIRE_ENCODING (coding)
4031       : ! CODING_REQUIRE_DECODING (coding))
4032     {
4033       coding->produced = len_byte;
4034       if (multibyte
4035           && ! replace
4036           /* See the comment of the member heading_ascii in coding.h.  */
4037           && coding->heading_ascii < len_byte)
4038         {
4039           /* We still may have to combine byte at the head and the
4040              tail of the text in the region.  */
4041           if (from < GPT && GPT < to)
4042             move_gap_both (to, to_byte);
4043           len = multibyte_chars_in_text (BYTE_POS_ADDR (from_byte), len_byte);
4044           adjust_after_insert (from, from_byte, to, to_byte, len);
4045           coding->produced_char = len;
4046         }
4047       else
4048         {
4049           if (!replace)
4050             adjust_after_insert (from, from_byte, to, to_byte, len_byte);
4051           coding->produced_char = len_byte;
4052         }
4053       return 0;
4054     }
4055
4056   /* Now we convert the text.  */
4057
4058   /* For encoding, we must process pre-write-conversion in advance.  */
4059   if (encodep
4060       && ! NILP (coding->pre_write_conversion)
4061       && SYMBOLP (coding->pre_write_conversion)
4062       && ! NILP (Ffboundp (coding->pre_write_conversion)))
4063     {
4064       /* The function in pre-write-conversion may put a new text in a
4065          new buffer.  */
4066       struct buffer *prev = current_buffer, *new;
4067
4068       call2 (coding->pre_write_conversion,
4069              make_number (from), make_number (to));
4070       if (current_buffer != prev)
4071         {
4072           len = ZV - BEGV;
4073           new = current_buffer;
4074           set_buffer_internal_1 (prev);
4075           del_range_2 (from, from_byte, to, to_byte);
4076           insert_from_buffer (new, BEG, len, 0);
4077           to = from + len;
4078           to_byte = multibyte ? CHAR_TO_BYTE (to) : to;
4079           len_byte = to_byte - from_byte;
4080         }
4081     }
4082
4083   if (replace)
4084     deletion = make_buffer_string_both (from, from_byte, to, to_byte, 1);
4085
4086   /* Try to skip the heading and tailing ASCIIs.  */
4087   {
4088     int from_byte_orig = from_byte, to_byte_orig = to_byte;
4089
4090     if (from < GPT && GPT < to)
4091       move_gap_both (from, from_byte);
4092     if (encodep)
4093       shrink_encoding_region (&from_byte, &to_byte, coding, NULL);
4094     else
4095       shrink_decoding_region (&from_byte, &to_byte, coding, NULL);
4096     if (from_byte == to_byte)
4097       {
4098         coding->produced = len_byte;
4099         coding->produced_char = multibyte ? len : len_byte;
4100         if (!replace)
4101           /* We must record and adjust for this new text now.  */
4102           adjust_after_insert (from, from_byte_orig, to, to_byte_orig, len);
4103         return 0;
4104       }
4105
4106     head_skip = from_byte - from_byte_orig;
4107     tail_skip = to_byte_orig - to_byte;
4108     total_skip = head_skip + tail_skip;
4109     from += head_skip;
4110     to -= tail_skip;
4111     len -= total_skip; len_byte -= total_skip;
4112   }
4113
4114   /* For converion, we must put the gap before the text in addition to
4115      making the gap larger for efficient decoding.  The required gap
4116      size starts from 2000 which is the magic number used in make_gap.
4117      But, after one batch of conversion, it will be incremented if we
4118      find that it is not enough .  */
4119   require = 2000;
4120
4121   if (GAP_SIZE  < require)
4122     make_gap (require - GAP_SIZE);
4123   move_gap_both (from, from_byte);
4124
4125   if (GPT - BEG < beg_unchanged)
4126     beg_unchanged = GPT - BEG;
4127   if (Z - GPT < end_unchanged)
4128     end_unchanged = Z - GPT;
4129
4130   inserted = inserted_byte = 0;
4131   src = GAP_END_ADDR, dst = GPT_ADDR;
4132
4133   GAP_SIZE += len_byte;
4134   ZV -= len;
4135   Z -= len;
4136   ZV_BYTE -= len_byte;
4137   Z_BYTE -= len_byte;
4138
4139   for (;;)
4140     {
4141       int result;
4142
4143       /* The buffer memory is changed from:
4144          +--------+converted-text+---------+-------original-text------+---+
4145          |<-from->|<--inserted-->|---------|<-----------len---------->|---|
4146                   |<------------------- GAP_SIZE -------------------->|  */
4147       if (encodep)
4148         result = encode_coding (coding, src, dst, len_byte, 0);
4149       else
4150         result = decode_coding (coding, src, dst, len_byte, 0);
4151       /* to:
4152          +--------+-------converted-text--------+--+---original-text--+---+
4153          |<-from->|<--inserted-->|<--produced-->|--|<-(len-consumed)->|---|
4154                   |<------------------- GAP_SIZE -------------------->|  */
4155       if (coding->fake_multibyte)
4156         fake_multibyte = 1;
4157
4158       if (!encodep && !multibyte)
4159         coding->produced_char = coding->produced;
4160       inserted += coding->produced_char;
4161       inserted_byte += coding->produced;
4162       len_byte -= coding->consumed;
4163       src += coding->consumed;
4164       dst += inserted_byte;
4165
4166       if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4167         {
4168           unsigned char *pend = dst, *p = pend - inserted_byte;
4169
4170           /* Encode LFs back to the original eol format (CR or CRLF).  */
4171           if (coding->eol_type == CODING_EOL_CR)
4172             {
4173               while (p < pend) if (*p++ == '\n') p[-1] = '\r';
4174             }
4175           else
4176             {
4177               int count = 0;
4178
4179               while (p < pend) if (*p++ == '\n') count++;
4180               if (src - dst < count)
4181                 {
4182                   /* We don't have sufficient room for putting LFs
4183                      back to CRLF.  We must record converted and
4184                      not-yet-converted text back to the buffer
4185                      content, enlarge the gap, then record them out of
4186                      the buffer contents again.  */
4187                   int add = len_byte + inserted_byte;
4188
4189                   GAP_SIZE -= add;
4190                   ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4191                   GPT += inserted_byte; GPT_BYTE += inserted_byte;
4192                   make_gap (count - GAP_SIZE);
4193                   GAP_SIZE += add;
4194                   ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4195                   GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4196                   /* Don't forget to update SRC, DST, and PEND.  */
4197                   src = GAP_END_ADDR - len_byte;
4198                   dst = GPT_ADDR + inserted_byte;
4199                   pend = dst;
4200                 }
4201               inserted += count;
4202               inserted_byte += count;
4203               coding->produced += count;
4204               p = dst = pend + count;
4205               while (count)
4206                 {
4207                   *--p = *--pend;
4208                   if (*p == '\n') count--, *--p = '\r';
4209                 }
4210             }
4211
4212           /* Suppress eol-format conversion in the further conversion.  */
4213           coding->eol_type = CODING_EOL_LF;
4214
4215           /* Restore the original symbol.  */
4216           coding->symbol = saved_coding_symbol;
4217
4218           continue;
4219         }
4220       if (len_byte <= 0)
4221         break;
4222       if (result == CODING_FINISH_INSUFFICIENT_SRC)
4223         {
4224           /* The source text ends in invalid codes.  Let's just
4225              make them valid buffer contents, and finish conversion.  */
4226           inserted += len_byte;
4227           inserted_byte += len_byte;
4228           while (len_byte--)
4229             *dst++ = *src++;
4230           fake_multibyte = 1;
4231           break;
4232         }
4233       if (first)
4234         {
4235           /* We have just done the first batch of conversion which was
4236              stoped because of insufficient gap.  Let's reconsider the
4237              required gap size (i.e. SRT - DST) now.
4238
4239              We have converted ORIG bytes (== coding->consumed) into
4240              NEW bytes (coding->produced).  To convert the remaining
4241              LEN bytes, we may need REQUIRE bytes of gap, where:
4242                 REQUIRE + LEN_BYTE = LEN_BYTE * (NEW / ORIG)
4243                 REQUIRE = LEN_BYTE * (NEW - ORIG) / ORIG
4244              Here, we are sure that NEW >= ORIG.  */
4245           float ratio = coding->produced - coding->consumed;
4246           ratio /= coding->consumed;
4247           require = len_byte * ratio;
4248           first = 0;
4249         }
4250       if ((src - dst) < (require + 2000))
4251         {
4252           /* See the comment above the previous call of make_gap.  */
4253           int add = len_byte + inserted_byte;
4254
4255           GAP_SIZE -= add;
4256           ZV += add; Z += add; ZV_BYTE += add; Z_BYTE += add;
4257           GPT += inserted_byte; GPT_BYTE += inserted_byte;
4258           make_gap (require + 2000);
4259           GAP_SIZE += add;
4260           ZV -= add; Z -= add; ZV_BYTE -= add; Z_BYTE -= add;
4261           GPT -= inserted_byte; GPT_BYTE -= inserted_byte;
4262           /* Don't forget to update SRC, DST.  */
4263           src = GAP_END_ADDR - len_byte;
4264           dst = GPT_ADDR + inserted_byte;
4265         }
4266     }
4267   if (src - dst > 0) *dst = 0; /* Put an anchor.  */
4268
4269   if (multibyte
4270       && (fake_multibyte
4271           || !encodep && (to - from) != (to_byte - from_byte)))
4272     inserted = multibyte_chars_in_text (GPT_ADDR, inserted_byte);
4273
4274   /* If we have shrinked the conversion area, adjust it now.  */
4275   if (total_skip > 0)
4276     {
4277       if (tail_skip > 0)
4278         safe_bcopy (GAP_END_ADDR, GPT_ADDR + inserted_byte, tail_skip);
4279       inserted += total_skip; inserted_byte += total_skip;
4280       GAP_SIZE += total_skip;
4281       GPT -= head_skip; GPT_BYTE -= head_skip;
4282       ZV -= total_skip; ZV_BYTE -= total_skip;
4283       Z -= total_skip; Z_BYTE -= total_skip;
4284       from -= head_skip; from_byte -= head_skip;
4285       to += tail_skip; to_byte += tail_skip;
4286     }
4287
4288   adjust_after_replace (from, from_byte, deletion, inserted, inserted_byte);
4289
4290   if (! encodep && ! NILP (coding->post_read_conversion))
4291     {
4292       Lisp_Object val;
4293       int orig_inserted = inserted, pos = PT;
4294
4295       if (from != pos)
4296         temp_set_point_both (current_buffer, from, from_byte);
4297       val = call1 (coding->post_read_conversion, make_number (inserted));
4298       if (! NILP (val))
4299         {
4300           CHECK_NUMBER (val, 0);
4301           inserted = XFASTINT (val);
4302         }
4303       if (pos >= from + orig_inserted)
4304         temp_set_point (current_buffer, pos + (inserted - orig_inserted));
4305     }
4306
4307   signal_after_change (from, to - from, inserted);
4308
4309   {
4310     coding->consumed = to_byte - from_byte;
4311     coding->consumed_char = to - from;
4312     coding->produced = inserted_byte;
4313     coding->produced_char = inserted;
4314   }
4315
4316   return 0;
4317 }
4318
4319 Lisp_Object
4320 code_convert_string (str, coding, encodep, nocopy)
4321      Lisp_Object str;
4322      struct coding_system *coding;
4323      int encodep, nocopy;
4324 {
4325   int len;
4326   char *buf;
4327   int from = 0, to = XSTRING (str)->size;
4328   int to_byte = STRING_BYTES (XSTRING (str));
4329   struct gcpro gcpro1;
4330   Lisp_Object saved_coding_symbol = Qnil;
4331   int result;
4332
4333   if (encodep && !NILP (coding->pre_write_conversion)
4334       || !encodep && !NILP (coding->post_read_conversion))
4335     {
4336       /* Since we have to call Lisp functions which assume target text
4337          is in a buffer, after setting a temporary buffer, call
4338          code_convert_region.  */
4339       int count = specpdl_ptr - specpdl;
4340       struct buffer *prev = current_buffer;
4341
4342       record_unwind_protect (Fset_buffer, Fcurrent_buffer ());
4343       temp_output_buffer_setup (" *code-converting-work*");
4344       set_buffer_internal (XBUFFER (Vstandard_output));
4345       if (encodep)
4346         insert_from_string (str, 0, 0, to, to_byte, 0);
4347       else
4348         {
4349           /* We must insert the contents of STR as is without
4350              unibyte<->multibyte conversion.  */
4351           current_buffer->enable_multibyte_characters = Qnil;
4352           insert_from_string (str, 0, 0, to_byte, to_byte, 0);
4353           current_buffer->enable_multibyte_characters = Qt;
4354         }
4355       code_convert_region (BEGV, BEGV_BYTE, ZV, ZV_BYTE, coding, encodep, 1);
4356       if (encodep)
4357         /* We must return the buffer contents as unibyte string.  */
4358         current_buffer->enable_multibyte_characters = Qnil;
4359       str = make_buffer_string (BEGV, ZV, 0);
4360       set_buffer_internal (prev);
4361       return unbind_to (count, str);
4362     }
4363
4364   if (! encodep && CODING_REQUIRE_DETECTION (coding))
4365     {
4366       /* See the comments in code_convert_region.  */
4367       if (coding->type == coding_type_undecided)
4368         {
4369           detect_coding (coding, XSTRING (str)->data, to_byte);
4370           if (coding->type == coding_type_undecided)
4371             coding->type = coding_type_emacs_mule;
4372         }
4373       if (coding->eol_type == CODING_EOL_UNDECIDED)
4374         {
4375           saved_coding_symbol = coding->symbol;
4376           detect_eol (coding, XSTRING (str)->data, to_byte);
4377           if (coding->eol_type == CODING_EOL_UNDECIDED)
4378             coding->eol_type = CODING_EOL_LF;
4379           /* We had better recover the original eol format if we
4380              encounter an inconsitent eol format while decoding.  */
4381           coding->mode |= CODING_MODE_INHIBIT_INCONSISTENT_EOL;
4382         }
4383     }
4384
4385   if (encodep
4386       ? ! CODING_REQUIRE_ENCODING (coding)
4387       : ! CODING_REQUIRE_DECODING (coding))
4388     from = to_byte;
4389   else
4390     {
4391       /* Try to skip the heading and tailing ASCIIs.  */
4392       if (encodep)
4393         shrink_encoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4394       else
4395         shrink_decoding_region (&from, &to_byte, coding, XSTRING (str)->data);
4396     }
4397   if (from == to_byte)
4398     return (nocopy ? str : Fcopy_sequence (str));
4399
4400   if (encodep)
4401     len = encoding_buffer_size (coding, to_byte - from);
4402   else
4403     len = decoding_buffer_size (coding, to_byte - from);
4404   len += from + STRING_BYTES (XSTRING (str)) - to_byte;
4405   GCPRO1 (str);
4406   buf = get_conversion_buffer (len);
4407   UNGCPRO;
4408
4409   if (from > 0)
4410     bcopy (XSTRING (str)->data, buf, from);
4411   result = (encodep
4412             ? encode_coding (coding, XSTRING (str)->data + from,
4413                              buf + from, to_byte - from, len)
4414             : decode_coding (coding, XSTRING (str)->data + from,
4415                              buf + from, to_byte - from, len));
4416   if (! encodep && result == CODING_FINISH_INCONSISTENT_EOL)
4417     {
4418       /* We simple try to decode the whole string again but without
4419          eol-conversion this time.  */
4420       coding->eol_type = CODING_EOL_LF;
4421       coding->symbol = saved_coding_symbol;
4422       return code_convert_string (str, coding, encodep, nocopy);
4423     }
4424
4425   bcopy (XSTRING (str)->data + to_byte, buf + from + coding->produced,
4426          STRING_BYTES (XSTRING (str)) - to_byte);
4427
4428   len = from + STRING_BYTES (XSTRING (str)) - to_byte;
4429   if (encodep)
4430     str = make_unibyte_string (buf, len + coding->produced);
4431   else
4432     str = make_string_from_bytes (buf, len + coding->produced_char,
4433                                   len + coding->produced);
4434   return str;
4435 }
4436
4437 \f
4438 #ifdef emacs
4439 /*** 7. Emacs Lisp library functions ***/
4440
4441 DEFUN ("coding-system-p", Fcoding_system_p, Scoding_system_p, 1, 1, 0,
4442   "Return t if OBJECT is nil or a coding-system.\n\
4443 See the documentation of `make-coding-system' for information\n\
4444 about coding-system objects.")
4445   (obj)
4446      Lisp_Object obj;
4447 {
4448   if (NILP (obj))
4449     return Qt;
4450   if (!SYMBOLP (obj))
4451     return Qnil;
4452   /* Get coding-spec vector for OBJ.  */
4453   obj = Fget (obj, Qcoding_system);
4454   return ((VECTORP (obj) && XVECTOR (obj)->size == 5)
4455           ? Qt : Qnil);
4456 }
4457
4458 DEFUN ("read-non-nil-coding-system", Fread_non_nil_coding_system,
4459        Sread_non_nil_coding_system, 1, 1, 0,
4460   "Read a coding system from the minibuffer, prompting with string PROMPT.")
4461   (prompt)
4462      Lisp_Object prompt;
4463 {
4464   Lisp_Object val;
4465   do
4466     {
4467       val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4468                               Qt, Qnil, Qcoding_system_history, Qnil, Qnil);
4469     }
4470   while (XSTRING (val)->size == 0);
4471   return (Fintern (val, Qnil));
4472 }
4473
4474 DEFUN ("read-coding-system", Fread_coding_system, Sread_coding_system, 1, 2, 0,
4475   "Read a coding system from the minibuffer, prompting with string PROMPT.\n\
4476 If the user enters null input, return second argument DEFAULT-CODING-SYSTEM.")
4477   (prompt, default_coding_system)
4478      Lisp_Object prompt, default_coding_system;
4479 {
4480   Lisp_Object val;
4481   if (SYMBOLP (default_coding_system))
4482     XSETSTRING (default_coding_system, XSYMBOL (default_coding_system)->name);
4483   val = Fcompleting_read (prompt, Vcoding_system_alist, Qnil,
4484                           Qt, Qnil, Qcoding_system_history,
4485                           default_coding_system, Qnil);
4486   return (XSTRING (val)->size == 0 ? Qnil : Fintern (val, Qnil));
4487 }
4488
4489 DEFUN ("check-coding-system", Fcheck_coding_system, Scheck_coding_system,
4490        1, 1, 0,
4491   "Check validity of CODING-SYSTEM.\n\
4492 If valid, return CODING-SYSTEM, else signal a `coding-system-error' error.\n\
4493 It is valid if it is a symbol with a non-nil `coding-system' property.\n\
4494 The value of property should be a vector of length 5.")
4495   (coding_system)
4496      Lisp_Object coding_system;
4497 {
4498   CHECK_SYMBOL (coding_system, 0);
4499   if (!NILP (Fcoding_system_p (coding_system)))
4500     return coding_system;
4501   while (1)
4502     Fsignal (Qcoding_system_error, Fcons (coding_system, Qnil));
4503 }
4504 \f
4505 Lisp_Object
4506 detect_coding_system (src, src_bytes, highest)
4507      unsigned char *src;
4508      int src_bytes, highest;
4509 {
4510   int coding_mask, eol_type;
4511   Lisp_Object val, tmp;
4512   int dummy;
4513
4514   coding_mask = detect_coding_mask (src, src_bytes, NULL, &dummy);
4515   eol_type  = detect_eol_type (src, src_bytes, &dummy);
4516   if (eol_type == CODING_EOL_INCONSISTENT)
4517     eol_type == CODING_EOL_UNDECIDED;
4518
4519   if (!coding_mask)
4520     {
4521       val = Qundecided;
4522       if (eol_type != CODING_EOL_UNDECIDED)
4523         {
4524           Lisp_Object val2;
4525           val2 = Fget (Qundecided, Qeol_type);
4526           if (VECTORP (val2))
4527             val = XVECTOR (val2)->contents[eol_type];
4528         }
4529       return (highest ? val : Fcons (val, Qnil));
4530     }
4531
4532   /* At first, gather possible coding systems in VAL.  */
4533   val = Qnil;
4534   for (tmp = Vcoding_category_list; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4535     {
4536       int idx
4537         = XFASTINT (Fget (XCONS (tmp)->car, Qcoding_category_index));
4538       if (coding_mask & (1 << idx))
4539         {
4540           val = Fcons (Fsymbol_value (XCONS (tmp)->car), val);
4541           if (highest)
4542             break;
4543         }
4544     }
4545   if (!highest)
4546     val = Fnreverse (val);
4547
4548   /* Then, replace the elements with subsidiary coding systems.  */
4549   for (tmp = val; !NILP (tmp); tmp = XCONS (tmp)->cdr)
4550     {
4551       if (eol_type != CODING_EOL_UNDECIDED
4552           && eol_type != CODING_EOL_INCONSISTENT)
4553         {
4554           Lisp_Object eol;
4555           eol = Fget (XCONS (tmp)->car, Qeol_type);
4556           if (VECTORP (eol))
4557             XCONS (tmp)->car = XVECTOR (eol)->contents[eol_type];
4558         }
4559     }
4560   return (highest ? XCONS (val)->car : val);
4561 }
4562
4563 DEFUN ("detect-coding-region", Fdetect_coding_region, Sdetect_coding_region,
4564        2, 3, 0,
4565   "Detect coding system of the text in the region between START and END.\n\
4566 Return a list of possible coding systems ordered by priority.\n\
4567 \n\
4568 If only ASCII characters are found, it returns a list of single element\n\
4569 `undecided' or its subsidiary coding system according to a detected\n\
4570 end-of-line format.\n\
4571 \n\
4572 If optional argument HIGHEST is non-nil, return the coding system of\n\
4573 highest priority.")
4574   (start, end, highest)
4575      Lisp_Object start, end, highest;
4576 {
4577   int from, to;
4578   int from_byte, to_byte;
4579
4580   CHECK_NUMBER_COERCE_MARKER (start, 0);
4581   CHECK_NUMBER_COERCE_MARKER (end, 1);
4582
4583   validate_region (&start, &end);
4584   from = XINT (start), to = XINT (end);
4585   from_byte = CHAR_TO_BYTE (from);
4586   to_byte = CHAR_TO_BYTE (to);
4587
4588   if (from < GPT && to >= GPT)
4589     move_gap_both (to, to_byte);
4590
4591   return detect_coding_system (BYTE_POS_ADDR (from_byte),
4592                                to_byte - from_byte,
4593                                !NILP (highest));
4594 }
4595
4596 DEFUN ("detect-coding-string", Fdetect_coding_string, Sdetect_coding_string,
4597        1, 2, 0,
4598   "Detect coding system of the text in STRING.\n\
4599 Return a list of possible coding systems ordered by priority.\n\
4600 \n\
4601 If only ASCII characters are found, it returns a list of single element\n\
4602 `undecided' or its subsidiary coding system according to a detected\n\
4603 end-of-line format.\n\
4604 \n\
4605 If optional argument HIGHEST is non-nil, return the coding system of\n\
4606 highest priority.")
4607   (string, highest)
4608      Lisp_Object string, highest;
4609 {
4610   CHECK_STRING (string, 0);
4611
4612   return detect_coding_system (XSTRING (string)->data,
4613                                STRING_BYTES (XSTRING (string)),
4614                                !NILP (highest));
4615 }
4616
4617 Lisp_Object
4618 code_convert_region1 (start, end, coding_system, encodep)
4619      Lisp_Object start, end, coding_system;
4620      int encodep;
4621 {
4622   struct coding_system coding;
4623   int from, to, len;
4624
4625   CHECK_NUMBER_COERCE_MARKER (start, 0);
4626   CHECK_NUMBER_COERCE_MARKER (end, 1);
4627   CHECK_SYMBOL (coding_system, 2);
4628
4629   validate_region (&start, &end);
4630   from = XFASTINT (start);
4631   to = XFASTINT (end);
4632
4633   if (NILP (coding_system))
4634     return make_number (to - from);
4635
4636   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4637     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4638
4639   coding.mode |= CODING_MODE_LAST_BLOCK;
4640   code_convert_region (from, CHAR_TO_BYTE (from), to, CHAR_TO_BYTE (to),
4641                        &coding, encodep, 1);
4642   Vlast_coding_system_used = coding.symbol;
4643   return make_number (coding.produced_char);
4644 }
4645
4646 DEFUN ("decode-coding-region", Fdecode_coding_region, Sdecode_coding_region,
4647        3, 3, "r\nzCoding system: ",
4648   "Decode the current region by specified coding system.\n\
4649 When called from a program, takes three arguments:\n\
4650 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4651 This function sets `last-coding-system-used' to the precise coding system\n\
4652 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4653 not fully specified.)\n\
4654 It returns the length of the decoded text.")
4655   (start, end, coding_system)
4656      Lisp_Object start, end, coding_system;
4657 {
4658   return code_convert_region1 (start, end, coding_system, 0);
4659 }
4660
4661 DEFUN ("encode-coding-region", Fencode_coding_region, Sencode_coding_region,
4662        3, 3, "r\nzCoding system: ",
4663   "Encode the current region by specified coding system.\n\
4664 When called from a program, takes three arguments:\n\
4665 START, END, and CODING-SYSTEM.  START and END are buffer positions.\n\
4666 This function sets `last-coding-system-used' to the precise coding system\n\
4667 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4668 not fully specified.)\n\
4669 It returns the length of the encoded text.")
4670   (start, end, coding_system)
4671      Lisp_Object start, end, coding_system;
4672 {
4673   return code_convert_region1 (start, end, coding_system, 1);
4674 }
4675
4676 Lisp_Object
4677 code_convert_string1 (string, coding_system, nocopy, encodep)
4678      Lisp_Object string, coding_system, nocopy;
4679      int encodep;
4680 {
4681   struct coding_system coding;
4682
4683   CHECK_STRING (string, 0);
4684   CHECK_SYMBOL (coding_system, 1);
4685
4686   if (NILP (coding_system))
4687     return (NILP (nocopy) ? Fcopy_sequence (string) : string);
4688
4689   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4690     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4691
4692   coding.mode |= CODING_MODE_LAST_BLOCK;
4693   Vlast_coding_system_used = coding.symbol;
4694   return code_convert_string (string, &coding, encodep, !NILP (nocopy));
4695 }
4696
4697 DEFUN ("decode-coding-string", Fdecode_coding_string, Sdecode_coding_string,
4698        2, 3, 0,
4699   "Decode STRING which is encoded in CODING-SYSTEM, and return the result.\n\
4700 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4701 if the decoding operation is trivial.\n\
4702 This function sets `last-coding-system-used' to the precise coding system\n\
4703 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4704 not fully specified.)")
4705   (string, coding_system, nocopy)
4706      Lisp_Object string, coding_system, nocopy;
4707 {
4708   return code_convert_string1 (string, coding_system, nocopy, 0);
4709 }
4710
4711 DEFUN ("encode-coding-string", Fencode_coding_string, Sencode_coding_string,
4712        2, 3, 0,
4713   "Encode STRING to CODING-SYSTEM, and return the result.\n\
4714 Optional arg NOCOPY non-nil means it is ok to return STRING itself\n\
4715 if the encoding operation is trivial.\n\
4716 This function sets `last-coding-system-used' to the precise coding system\n\
4717 used (which may be different from CODING-SYSTEM if CODING-SYSTEM is\n\
4718 not fully specified.)")
4719   (string, coding_system, nocopy)
4720      Lisp_Object string, coding_system, nocopy;
4721 {
4722   return code_convert_string1 (string, coding_system, nocopy, 1);
4723 }
4724
4725 /* Encode or decode STRING according to CODING_SYSTEM.
4726    Do not set Vlast_coding_system_used.  */
4727
4728 Lisp_Object
4729 code_convert_string_norecord (string, coding_system, encodep)
4730      Lisp_Object string, coding_system;
4731      int encodep;
4732 {
4733   struct coding_system coding;
4734
4735   CHECK_STRING (string, 0);
4736   CHECK_SYMBOL (coding_system, 1);
4737
4738   if (NILP (coding_system))
4739     return string;
4740
4741   if (setup_coding_system (Fcheck_coding_system (coding_system), &coding) < 0)
4742     error ("Invalid coding system: %s", XSYMBOL (coding_system)->name->data);
4743
4744   coding.mode |= CODING_MODE_LAST_BLOCK;
4745   return code_convert_string (string, &coding, encodep, Qt);
4746 }
4747 \f
4748 DEFUN ("decode-sjis-char", Fdecode_sjis_char, Sdecode_sjis_char, 1, 1, 0,
4749   "Decode a JISX0208 character of shift-jis encoding.\n\
4750 CODE is the character code in SJIS.\n\
4751 Return the corresponding character.")
4752   (code)
4753      Lisp_Object code;
4754 {
4755   unsigned char c1, c2, s1, s2;
4756   Lisp_Object val;
4757
4758   CHECK_NUMBER (code, 0);
4759   s1 = (XFASTINT (code)) >> 8, s2 = (XFASTINT (code)) & 0xFF;
4760   DECODE_SJIS (s1, s2, c1, c2);
4761   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset_jisx0208, c1, c2));
4762   return val;
4763 }
4764
4765 DEFUN ("encode-sjis-char", Fencode_sjis_char, Sencode_sjis_char, 1, 1, 0,
4766   "Encode a JISX0208 character CHAR to SJIS coding system.\n\
4767 Return the corresponding character code in SJIS.")
4768   (ch)
4769      Lisp_Object ch;
4770 {
4771   int charset, c1, c2, s1, s2;
4772   Lisp_Object val;
4773
4774   CHECK_NUMBER (ch, 0);
4775   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4776   if (charset == charset_jisx0208)
4777     {
4778       ENCODE_SJIS (c1, c2, s1, s2);
4779       XSETFASTINT (val, (s1 << 8) | s2);
4780     }
4781   else
4782     XSETFASTINT (val, 0);
4783   return val;
4784 }
4785
4786 DEFUN ("decode-big5-char", Fdecode_big5_char, Sdecode_big5_char, 1, 1, 0,
4787   "Decode a Big5 character CODE of BIG5 coding system.\n\
4788 CODE is the character code in BIG5.\n\
4789 Return the corresponding character.")
4790   (code)
4791      Lisp_Object code;
4792 {
4793   int charset;
4794   unsigned char b1, b2, c1, c2;
4795   Lisp_Object val;
4796
4797   CHECK_NUMBER (code, 0);
4798   b1 = (XFASTINT (code)) >> 8, b2 = (XFASTINT (code)) & 0xFF;
4799   DECODE_BIG5 (b1, b2, charset, c1, c2);
4800   XSETFASTINT (val, MAKE_NON_ASCII_CHAR (charset, c1, c2));
4801   return val;
4802 }
4803
4804 DEFUN ("encode-big5-char", Fencode_big5_char, Sencode_big5_char, 1, 1, 0,
4805   "Encode the Big5 character CHAR to BIG5 coding system.\n\
4806 Return the corresponding character code in Big5.")
4807   (ch)
4808      Lisp_Object ch;
4809 {
4810   int charset, c1, c2, b1, b2;
4811   Lisp_Object val;
4812
4813   CHECK_NUMBER (ch, 0);
4814   SPLIT_CHAR (XFASTINT (ch), charset, c1, c2);
4815   if (charset == charset_big5_1 || charset == charset_big5_2)
4816     {
4817       ENCODE_BIG5 (charset, c1, c2, b1, b2);
4818       XSETFASTINT (val, (b1 << 8) | b2);
4819     }
4820   else
4821     XSETFASTINT (val, 0);
4822   return val;
4823 }
4824 \f
4825 DEFUN ("set-terminal-coding-system-internal",
4826        Fset_terminal_coding_system_internal,
4827        Sset_terminal_coding_system_internal, 1, 1, 0, "")
4828   (coding_system)
4829      Lisp_Object coding_system;
4830 {
4831   CHECK_SYMBOL (coding_system, 0);
4832   setup_coding_system (Fcheck_coding_system (coding_system), &terminal_coding);
4833   /* We had better not send unsafe characters to terminal.  */
4834   terminal_coding.flags |= CODING_FLAG_ISO_SAFE;
4835
4836   return Qnil;
4837 }
4838
4839 DEFUN ("set-safe-terminal-coding-system-internal",
4840        Fset_safe_terminal_coding_system_internal,
4841        Sset_safe_terminal_coding_system_internal, 1, 1, 0, "")
4842   (coding_system)
4843      Lisp_Object coding_system;
4844 {
4845   CHECK_SYMBOL (coding_system, 0);
4846   setup_coding_system (Fcheck_coding_system (coding_system),
4847                        &safe_terminal_coding);
4848   return Qnil;
4849 }
4850
4851 DEFUN ("terminal-coding-system",
4852        Fterminal_coding_system, Sterminal_coding_system, 0, 0, 0,
4853   "Return coding system specified for terminal output.")
4854   ()
4855 {
4856   return terminal_coding.symbol;
4857 }
4858
4859 DEFUN ("set-keyboard-coding-system-internal",
4860        Fset_keyboard_coding_system_internal,
4861        Sset_keyboard_coding_system_internal, 1, 1, 0, "")
4862   (coding_system)
4863      Lisp_Object coding_system;
4864 {
4865   CHECK_SYMBOL (coding_system, 0);
4866   setup_coding_system (Fcheck_coding_system (coding_system), &keyboard_coding);
4867   return Qnil;
4868 }
4869
4870 DEFUN ("keyboard-coding-system",
4871        Fkeyboard_coding_system, Skeyboard_coding_system, 0, 0, 0,
4872   "Return coding system specified for decoding keyboard input.")
4873   ()
4874 {
4875   return keyboard_coding.symbol;
4876 }
4877
4878 \f
4879 DEFUN ("find-operation-coding-system", Ffind_operation_coding_system,
4880        Sfind_operation_coding_system,  1, MANY, 0,
4881   "Choose a coding system for an operation based on the target name.\n\
4882 The value names a pair of coding systems: (DECODING-SYSTEM ENCODING-SYSTEM).\n\
4883 DECODING-SYSTEM is the coding system to use for decoding\n\
4884 \(in case OPERATION does decoding), and ENCODING-SYSTEM is the coding system\n\
4885 for encoding (in case OPERATION does encoding).\n\
4886 \n\
4887 The first argument OPERATION specifies an I/O primitive:\n\
4888   For file I/O, `insert-file-contents' or `write-region'.\n\
4889   For process I/O, `call-process', `call-process-region', or `start-process'.\n\
4890   For network I/O, `open-network-stream'.\n\
4891 \n\
4892 The remaining arguments should be the same arguments that were passed\n\
4893 to the primitive.  Depending on which primitive, one of those arguments\n\
4894 is selected as the TARGET.  For example, if OPERATION does file I/O,\n\
4895 whichever argument specifies the file name is TARGET.\n\
4896 \n\
4897 TARGET has a meaning which depends on OPERATION:\n\
4898   For file I/O, TARGET is a file name.\n\
4899   For process I/O, TARGET is a process name.\n\
4900   For network I/O, TARGET is a service name or a port number\n\
4901 \n\
4902 This function looks up what specified for TARGET in,\n\
4903 `file-coding-system-alist', `process-coding-system-alist',\n\
4904 or `network-coding-system-alist' depending on OPERATION.\n\
4905 They may specify a coding system, a cons of coding systems,\n\
4906 or a function symbol to call.\n\
4907 In the last case, we call the function with one argument,\n\
4908 which is a list of all the arguments given to this function.")
4909   (nargs, args)
4910      int nargs;
4911      Lisp_Object *args;
4912 {
4913   Lisp_Object operation, target_idx, target, val;
4914   register Lisp_Object chain;
4915
4916   if (nargs < 2)
4917     error ("Too few arguments");
4918   operation = args[0];
4919   if (!SYMBOLP (operation)
4920       || !INTEGERP (target_idx = Fget (operation, Qtarget_idx)))
4921     error ("Invalid first arguement");
4922   if (nargs < 1 + XINT (target_idx))
4923     error ("Too few arguments for operation: %s",
4924            XSYMBOL (operation)->name->data);
4925   target = args[XINT (target_idx) + 1];
4926   if (!(STRINGP (target)
4927         || (EQ (operation, Qopen_network_stream) && INTEGERP (target))))
4928     error ("Invalid %dth argument", XINT (target_idx) + 1);
4929
4930   chain = ((EQ (operation, Qinsert_file_contents)
4931             || EQ (operation, Qwrite_region))
4932            ? Vfile_coding_system_alist
4933            : (EQ (operation, Qopen_network_stream)
4934               ? Vnetwork_coding_system_alist
4935               : Vprocess_coding_system_alist));
4936   if (NILP (chain))
4937     return Qnil;
4938
4939   for (; CONSP (chain); chain = XCONS (chain)->cdr)
4940     {
4941       Lisp_Object elt;
4942       elt = XCONS (chain)->car;
4943
4944       if (CONSP (elt)
4945           && ((STRINGP (target)
4946                && STRINGP (XCONS (elt)->car)
4947                && fast_string_match (XCONS (elt)->car, target) >= 0)
4948               || (INTEGERP (target) && EQ (target, XCONS (elt)->car))))
4949         {
4950           val = XCONS (elt)->cdr;
4951           /* Here, if VAL is both a valid coding system and a valid
4952              function symbol, we return VAL as a coding system.  */
4953           if (CONSP (val))
4954             return val;
4955           if (! SYMBOLP (val))
4956             return Qnil;
4957           if (! NILP (Fcoding_system_p (val)))
4958             return Fcons (val, val);
4959           if (! NILP (Ffboundp (val)))
4960             {
4961               val = call1 (val, Flist (nargs, args));
4962               if (CONSP (val))
4963                 return val;
4964               if (SYMBOLP (val) && ! NILP (Fcoding_system_p (val)))
4965                 return Fcons (val, val);
4966             }
4967           return Qnil;
4968         }
4969     }
4970   return Qnil;
4971 }
4972
4973 DEFUN ("update-iso-coding-systems", Fupdate_iso_coding_systems,
4974        Supdate_iso_coding_systems, 0, 0, 0,
4975   "Update internal database for ISO2022 based coding systems.\n\
4976 When values of the following coding categories are changed, you must\n\
4977 call this function:\n\
4978   coding-category-iso-7, coding-category-iso-7-tight,\n\
4979   coding-category-iso-8-1, coding-category-iso-8-2,\n\
4980   coding-category-iso-7-else, coding-category-iso-8-else")
4981   ()
4982 {
4983   int i;
4984
4985   for (i = CODING_CATEGORY_IDX_ISO_7; i <= CODING_CATEGORY_IDX_ISO_8_ELSE;
4986        i++)
4987     {
4988       if (! coding_system_table[i])
4989         coding_system_table[i]
4990           = (struct coding_system *) xmalloc (sizeof (struct coding_system));
4991       setup_coding_system
4992         (XSYMBOL (XVECTOR (Vcoding_category_table)->contents[i])->value,
4993          coding_system_table[i]);
4994     }
4995   return Qnil;
4996 }
4997
4998 DEFUN ("set-coding-priority-internal", Fset_coding_priority_internal,
4999        Sset_coding_priority_internal, 0, 0, 0,
5000   "Update internal database for the current value of `coding-category-list'.\n\
5001 This function is internal use only.")
5002   ()
5003 {
5004   int i = 0, idx;
5005   Lisp_Object val = Vcoding_category_list;
5006
5007   while (CONSP (val) && i < CODING_CATEGORY_IDX_MAX)
5008     {
5009       if (! SYMBOLP (XCONS (val)->car))
5010         break;
5011       idx = XFASTINT (Fget (XCONS (val)->car, Qcoding_category_index));
5012       if (idx >= CODING_CATEGORY_IDX_MAX)
5013         break;
5014       coding_priorities[i++] = (1 << idx);
5015       val = XCONS (val)->cdr;
5016     }
5017   /* If coding-category-list is valid and contains all coding
5018      categories, `i' should be CODING_CATEGORY_IDX_MAX now.  If not,
5019      the following code saves Emacs from craching.  */
5020   while (i < CODING_CATEGORY_IDX_MAX)
5021     coding_priorities[i++] = CODING_CATEGORY_MASK_RAW_TEXT;
5022
5023   return Qnil;
5024 }
5025
5026 #endif /* emacs */
5027
5028 \f
5029 /*** 8. Post-amble ***/
5030
5031 void
5032 init_coding_once ()
5033 {
5034   int i;
5035
5036   /* Emacs' internal format specific initialize routine.  */
5037   for (i = 0; i <= 0x20; i++)
5038     emacs_code_class[i] = EMACS_control_code;
5039   emacs_code_class[0x0A] = EMACS_linefeed_code;
5040   emacs_code_class[0x0D] = EMACS_carriage_return_code;
5041   for (i = 0x21 ; i < 0x7F; i++)
5042     emacs_code_class[i] = EMACS_ascii_code;
5043   emacs_code_class[0x7F] = EMACS_control_code;
5044   emacs_code_class[0x80] = EMACS_leading_code_composition;
5045   for (i = 0x81; i < 0xFF; i++)
5046     emacs_code_class[i] = EMACS_invalid_code;
5047   emacs_code_class[LEADING_CODE_PRIVATE_11] = EMACS_leading_code_3;
5048   emacs_code_class[LEADING_CODE_PRIVATE_12] = EMACS_leading_code_3;
5049   emacs_code_class[LEADING_CODE_PRIVATE_21] = EMACS_leading_code_4;
5050   emacs_code_class[LEADING_CODE_PRIVATE_22] = EMACS_leading_code_4;
5051
5052   /* ISO2022 specific initialize routine.  */
5053   for (i = 0; i < 0x20; i++)
5054     iso_code_class[i] = ISO_control_code;
5055   for (i = 0x21; i < 0x7F; i++)
5056     iso_code_class[i] = ISO_graphic_plane_0;
5057   for (i = 0x80; i < 0xA0; i++)
5058     iso_code_class[i] = ISO_control_code;
5059   for (i = 0xA1; i < 0xFF; i++)
5060     iso_code_class[i] = ISO_graphic_plane_1;
5061   iso_code_class[0x20] = iso_code_class[0x7F] = ISO_0x20_or_0x7F;
5062   iso_code_class[0xA0] = iso_code_class[0xFF] = ISO_0xA0_or_0xFF;
5063   iso_code_class[ISO_CODE_CR] = ISO_carriage_return;
5064   iso_code_class[ISO_CODE_SO] = ISO_shift_out;
5065   iso_code_class[ISO_CODE_SI] = ISO_shift_in;
5066   iso_code_class[ISO_CODE_SS2_7] = ISO_single_shift_2_7;
5067   iso_code_class[ISO_CODE_ESC] = ISO_escape;
5068   iso_code_class[ISO_CODE_SS2] = ISO_single_shift_2;
5069   iso_code_class[ISO_CODE_SS3] = ISO_single_shift_3;
5070   iso_code_class[ISO_CODE_CSI] = ISO_control_sequence_introducer;
5071
5072   conversion_buffer_size = MINIMUM_CONVERSION_BUFFER_SIZE;
5073   conversion_buffer = (char *) xmalloc (MINIMUM_CONVERSION_BUFFER_SIZE);
5074
5075   setup_coding_system (Qnil, &keyboard_coding);
5076   setup_coding_system (Qnil, &terminal_coding);
5077   setup_coding_system (Qnil, &safe_terminal_coding);
5078
5079   bzero (coding_system_table, sizeof coding_system_table);
5080
5081   bzero (ascii_skip_code, sizeof ascii_skip_code);
5082   for (i = 0; i < 128; i++)
5083     ascii_skip_code[i] = 1;
5084
5085 #if defined (MSDOS) || defined (WINDOWSNT)
5086   system_eol_type = CODING_EOL_CRLF;
5087 #else
5088   system_eol_type = CODING_EOL_LF;
5089 #endif
5090 }
5091
5092 #ifdef emacs
5093
5094 void
5095 syms_of_coding ()
5096 {
5097   Qtarget_idx = intern ("target-idx");
5098   staticpro (&Qtarget_idx);
5099
5100   Qcoding_system_history = intern ("coding-system-history");
5101   staticpro (&Qcoding_system_history);
5102   Fset (Qcoding_system_history, Qnil);
5103
5104   /* Target FILENAME is the first argument.  */
5105   Fput (Qinsert_file_contents, Qtarget_idx, make_number (0));
5106   /* Target FILENAME is the third argument.  */
5107   Fput (Qwrite_region, Qtarget_idx, make_number (2));
5108
5109   Qcall_process = intern ("call-process");
5110   staticpro (&Qcall_process);
5111   /* Target PROGRAM is the first argument.  */
5112   Fput (Qcall_process, Qtarget_idx, make_number (0));
5113
5114   Qcall_process_region = intern ("call-process-region");
5115   staticpro (&Qcall_process_region);
5116   /* Target PROGRAM is the third argument.  */
5117   Fput (Qcall_process_region, Qtarget_idx, make_number (2));
5118
5119   Qstart_process = intern ("start-process");
5120   staticpro (&Qstart_process);
5121   /* Target PROGRAM is the third argument.  */
5122   Fput (Qstart_process, Qtarget_idx, make_number (2));
5123
5124   Qopen_network_stream = intern ("open-network-stream");
5125   staticpro (&Qopen_network_stream);
5126   /* Target SERVICE is the fourth argument.  */
5127   Fput (Qopen_network_stream, Qtarget_idx, make_number (3));
5128
5129   Qcoding_system = intern ("coding-system");
5130   staticpro (&Qcoding_system);
5131
5132   Qeol_type = intern ("eol-type");
5133   staticpro (&Qeol_type);
5134
5135   Qbuffer_file_coding_system = intern ("buffer-file-coding-system");
5136   staticpro (&Qbuffer_file_coding_system);
5137
5138   Qpost_read_conversion = intern ("post-read-conversion");
5139   staticpro (&Qpost_read_conversion);
5140
5141   Qpre_write_conversion = intern ("pre-write-conversion");
5142   staticpro (&Qpre_write_conversion);
5143
5144   Qno_conversion = intern ("no-conversion");
5145   staticpro (&Qno_conversion);
5146
5147   Qundecided = intern ("undecided");
5148   staticpro (&Qundecided);
5149
5150   Qcoding_system_p = intern ("coding-system-p");
5151   staticpro (&Qcoding_system_p);
5152
5153   Qcoding_system_error = intern ("coding-system-error");
5154   staticpro (&Qcoding_system_error);
5155
5156   Fput (Qcoding_system_error, Qerror_conditions,
5157         Fcons (Qcoding_system_error, Fcons (Qerror, Qnil)));
5158   Fput (Qcoding_system_error, Qerror_message,
5159         build_string ("Invalid coding system"));
5160
5161   Qcoding_category = intern ("coding-category");
5162   staticpro (&Qcoding_category);
5163   Qcoding_category_index = intern ("coding-category-index");
5164   staticpro (&Qcoding_category_index);
5165
5166   Vcoding_category_table
5167     = Fmake_vector (make_number (CODING_CATEGORY_IDX_MAX), Qnil);
5168   staticpro (&Vcoding_category_table);
5169   {
5170     int i;
5171     for (i = 0; i < CODING_CATEGORY_IDX_MAX; i++)
5172       {
5173         XVECTOR (Vcoding_category_table)->contents[i]
5174           = intern (coding_category_name[i]);
5175         Fput (XVECTOR (Vcoding_category_table)->contents[i],
5176               Qcoding_category_index, make_number (i));
5177       }
5178   }
5179
5180   Qtranslation_table = intern ("translation-table");
5181   staticpro (&Qtranslation_table);
5182   Fput (Qtranslation_table, Qchar_table_extra_slots, make_number (0));
5183
5184   Qtranslation_table_id = intern ("translation-table-id");
5185   staticpro (&Qtranslation_table_id);
5186
5187   Qtranslation_table_for_decode = intern ("translation-table-for-decode");
5188   staticpro (&Qtranslation_table_for_decode);
5189
5190   Qtranslation_table_for_encode = intern ("translation-table-for-encode");
5191   staticpro (&Qtranslation_table_for_encode);
5192
5193   Qsafe_charsets = intern ("safe-charsets");
5194   staticpro (&Qsafe_charsets);
5195
5196   Qemacs_mule = intern ("emacs-mule");
5197   staticpro (&Qemacs_mule);
5198
5199   Qraw_text = intern ("raw-text");
5200   staticpro (&Qraw_text);
5201
5202   defsubr (&Scoding_system_p);
5203   defsubr (&Sread_coding_system);
5204   defsubr (&Sread_non_nil_coding_system);
5205   defsubr (&Scheck_coding_system);
5206   defsubr (&Sdetect_coding_region);
5207   defsubr (&Sdetect_coding_string);
5208   defsubr (&Sdecode_coding_region);
5209   defsubr (&Sencode_coding_region);
5210   defsubr (&Sdecode_coding_string);
5211   defsubr (&Sencode_coding_string);
5212   defsubr (&Sdecode_sjis_char);
5213   defsubr (&Sencode_sjis_char);
5214   defsubr (&Sdecode_big5_char);
5215   defsubr (&Sencode_big5_char);
5216   defsubr (&Sset_terminal_coding_system_internal);
5217   defsubr (&Sset_safe_terminal_coding_system_internal);
5218   defsubr (&Sterminal_coding_system);
5219   defsubr (&Sset_keyboard_coding_system_internal);
5220   defsubr (&Skeyboard_coding_system);
5221   defsubr (&Sfind_operation_coding_system);
5222   defsubr (&Supdate_iso_coding_systems);
5223   defsubr (&Sset_coding_priority_internal);
5224
5225   DEFVAR_LISP ("coding-system-list", &Vcoding_system_list,
5226     "List of coding systems.\n\
5227 \n\
5228 Do not alter the value of this variable manually.  This variable should be\n\
5229 updated by the functions `make-coding-system' and\n\
5230 `define-coding-system-alias'.");
5231   Vcoding_system_list = Qnil;
5232
5233   DEFVAR_LISP ("coding-system-alist", &Vcoding_system_alist,
5234     "Alist of coding system names.\n\
5235 Each element is one element list of coding system name.\n\
5236 This variable is given to `completing-read' as TABLE argument.\n\
5237 \n\
5238 Do not alter the value of this variable manually.  This variable should be\n\
5239 updated by the functions `make-coding-system' and\n\
5240 `define-coding-system-alias'.");
5241   Vcoding_system_alist = Qnil;
5242
5243   DEFVAR_LISP ("coding-category-list", &Vcoding_category_list,
5244     "List of coding-categories (symbols) ordered by priority.");
5245   {
5246     int i;
5247
5248     Vcoding_category_list = Qnil;
5249     for (i = CODING_CATEGORY_IDX_MAX - 1; i >= 0; i--)
5250       Vcoding_category_list
5251         = Fcons (XVECTOR (Vcoding_category_table)->contents[i],
5252                  Vcoding_category_list);
5253   }
5254
5255   DEFVAR_LISP ("coding-system-for-read", &Vcoding_system_for_read,
5256     "Specify the coding system for read operations.\n\
5257 It is useful to bind this variable with `let', but do not set it globally.\n\
5258 If the value is a coding system, it is used for decoding on read operation.\n\
5259 If not, an appropriate element is used from one of the coding system alists:\n\
5260 There are three such tables, `file-coding-system-alist',\n\
5261 `process-coding-system-alist', and `network-coding-system-alist'.");
5262   Vcoding_system_for_read = Qnil;
5263
5264   DEFVAR_LISP ("coding-system-for-write", &Vcoding_system_for_write,
5265     "Specify the coding system for write operations.\n\
5266 It is useful to bind this variable with `let', but do not set it globally.\n\
5267 If the value is a coding system, it is used for encoding on write operation.\n\
5268 If not, an appropriate element is used from one of the coding system alists:\n\
5269 There are three such tables, `file-coding-system-alist',\n\
5270 `process-coding-system-alist', and `network-coding-system-alist'.");
5271   Vcoding_system_for_write = Qnil;
5272
5273   DEFVAR_LISP ("last-coding-system-used", &Vlast_coding_system_used,
5274     "Coding system used in the latest file or process I/O.");
5275   Vlast_coding_system_used = Qnil;
5276
5277   DEFVAR_BOOL ("inhibit-eol-conversion", &inhibit_eol_conversion,
5278     "*Non-nil inhibit code conversion of end-of-line format in any cases.");
5279   inhibit_eol_conversion = 0;
5280
5281   DEFVAR_BOOL ("inherit-process-coding-system", &inherit_process_coding_system,
5282     "Non-nil means process buffer inherits coding system of process output.\n\
5283 Bind it to t if the process output is to be treated as if it were a file\n\
5284 read from some filesystem.");
5285   inherit_process_coding_system = 0;
5286
5287   DEFVAR_LISP ("file-coding-system-alist", &Vfile_coding_system_alist,
5288     "Alist to decide a coding system to use for a file I/O operation.\n\
5289 The format is ((PATTERN . VAL) ...),\n\
5290 where PATTERN is a regular expression matching a file name,\n\
5291 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5292 If VAL is a coding system, it is used for both decoding and encoding\n\
5293 the file contents.\n\
5294 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5295 and the cdr part is used for encoding.\n\
5296 If VAL is a function symbol, the function must return a coding system\n\
5297 or a cons of coding systems which are used as above.\n\
5298 \n\
5299 See also the function `find-operation-coding-system'.");
5300   Vfile_coding_system_alist = Qnil;
5301
5302   DEFVAR_LISP ("process-coding-system-alist", &Vprocess_coding_system_alist,
5303     "Alist to decide a coding system to use for a process I/O operation.\n\
5304 The format is ((PATTERN . VAL) ...),\n\
5305 where PATTERN is a regular expression matching a program name,\n\
5306 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5307 If VAL is a coding system, it is used for both decoding what received\n\
5308 from the program and encoding what sent to the program.\n\
5309 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5310 and the cdr part is used for encoding.\n\
5311 If VAL is a function symbol, the function must return a coding system\n\
5312 or a cons of coding systems which are used as above.\n\
5313 \n\
5314 See also the function `find-operation-coding-system'.");
5315   Vprocess_coding_system_alist = Qnil;
5316
5317   DEFVAR_LISP ("network-coding-system-alist", &Vnetwork_coding_system_alist,
5318     "Alist to decide a coding system to use for a network I/O operation.\n\
5319 The format is ((PATTERN . VAL) ...),\n\
5320 where PATTERN is a regular expression matching a network service name\n\
5321 or is a port number to connect to,\n\
5322 VAL is a coding system, a cons of coding systems, or a function symbol.\n\
5323 If VAL is a coding system, it is used for both decoding what received\n\
5324 from the network stream and encoding what sent to the network stream.\n\
5325 If VAL is a cons of coding systems, the car part is used for decoding,\n\
5326 and the cdr part is used for encoding.\n\
5327 If VAL is a function symbol, the function must return a coding system\n\
5328 or a cons of coding systems which are used as above.\n\
5329 \n\
5330 See also the function `find-operation-coding-system'.");
5331   Vnetwork_coding_system_alist = Qnil;
5332
5333   DEFVAR_INT ("eol-mnemonic-unix", &eol_mnemonic_unix,
5334     "Mnemonic character indicating UNIX-like end-of-line format (i.e. LF) .");
5335   eol_mnemonic_unix = ':';
5336
5337   DEFVAR_INT ("eol-mnemonic-dos", &eol_mnemonic_dos,
5338     "Mnemonic character indicating DOS-like end-of-line format (i.e. CRLF).");
5339   eol_mnemonic_dos = '\\';
5340
5341   DEFVAR_INT ("eol-mnemonic-mac", &eol_mnemonic_mac,
5342     "Mnemonic character indicating MAC-like end-of-line format (i.e. CR).");
5343   eol_mnemonic_mac = '/';
5344
5345   DEFVAR_INT ("eol-mnemonic-undecided", &eol_mnemonic_undecided,
5346     "Mnemonic character indicating end-of-line format is not yet decided.");
5347   eol_mnemonic_undecided = ':';
5348
5349   DEFVAR_LISP ("enable-character-translation", &Venable_character_translation,
5350     "*Non-nil enables character translation while encoding and decoding.");
5351   Venable_character_translation = Qt;
5352
5353   DEFVAR_LISP ("standard-translation-table-for-decode",
5354     &Vstandard_translation_table_for_decode,
5355     "Table for translating characters while decoding.");
5356   Vstandard_translation_table_for_decode = Qnil;
5357
5358   DEFVAR_LISP ("standard-translation-table-for-encode",
5359     &Vstandard_translation_table_for_encode,
5360     "Table for translationg characters while encoding.");
5361   Vstandard_translation_table_for_encode = Qnil;
5362
5363   DEFVAR_LISP ("charset-revision-table", &Vcharset_revision_alist,
5364     "Alist of charsets vs revision numbers.\n\
5365 While encoding, if a charset (car part of an element) is found,\n\
5366 designate it with the escape sequence identifing revision (cdr part of the element).");
5367   Vcharset_revision_alist = Qnil;
5368
5369   DEFVAR_LISP ("default-process-coding-system",
5370                &Vdefault_process_coding_system,
5371     "Cons of coding systems used for process I/O by default.\n\
5372 The car part is used for decoding a process output,\n\
5373 the cdr part is used for encoding a text to be sent to a process.");
5374   Vdefault_process_coding_system = Qnil;
5375
5376   DEFVAR_LISP ("latin-extra-code-table", &Vlatin_extra_code_table,
5377     "Table of extra Latin codes in the range 128..159 (inclusive).\n\
5378 This is a vector of length 256.\n\
5379 If Nth element is non-nil, the existence of code N in a file\n\
5380 \(or output of subprocess) doesn't prevent it to be detected as\n\
5381 a coding system of ISO 2022 variant which has a flag\n\
5382 `accept-latin-extra-code' t (e.g. iso-latin-1) on reading a file\n\
5383 or reading output of a subprocess.\n\
5384 Only 128th through 159th elements has a meaning.");
5385   Vlatin_extra_code_table = Fmake_vector (make_number (256), Qnil);
5386
5387   DEFVAR_LISP ("select-safe-coding-system-function",
5388                &Vselect_safe_coding_system_function,
5389     "Function to call to select safe coding system for encoding a text.\n\
5390 \n\
5391 If set, this function is called to force a user to select a proper\n\
5392 coding system which can encode the text in the case that a default\n\
5393 coding system used in each operation can't encode the text.\n\
5394 \n\
5395 The default value is `select-safe-codign-system' (which see).");
5396   Vselect_safe_coding_system_function = Qnil;
5397
5398 }
5399
5400 #endif /* emacs */